diff --git a/config.py b/config.py new file mode 100644 index 0000000..388f1d8 --- /dev/null +++ b/config.py @@ -0,0 +1,38 @@ +""" + 网络相关配置文件 +""" + +# 预训练权重 +weight_model_path = './vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5' + +# 输入图像大小,52*52*3 +input_shape = (52,52,3) + +# 如果不使用GPU训练,则传入列表,多个GPU则传入多个编号 +gpu = [] +# gpu = [0,1,2,3] + +# 训练ecoph数 +epochs = 3000 +# 批次大小 +batch_size = 128 + +# 学习率 +lr = 1e-3 + +# 是否开启tensorboard +tensorboard = True + +# tensorboard日志目录 +tensorboard_log_dir = './logs' + +# 验证集比例 +valid_rate = 0.05 + +sample_path = './sample' + +auto_best_checkpoint_path = './checkpoint/best.h5' + +auto_epoch_checkpoint_path = './checkpoint/epoch_{epoch:03d}.h5' + +model_save_path = './model.h5' \ No newline at end of file diff --git a/model.png b/model.png new file mode 100644 index 0000000..3a199d8 Binary files /dev/null and b/model.png differ diff --git a/pre.py b/pre.py new file mode 100644 index 0000000..ef917d2 --- /dev/null +++ b/pre.py @@ -0,0 +1,34 @@ +import cv2 +import numpy as np +from keras.models import load_model +from keras.layers import Lambda +from keras import backend as K +import os +import random +from PIL import Image + +input1 = input('请输入图片名称1:') +input2 = input('请输入图片名称2:') + +output = Lambda(lambda x: K.abs(x[0] - x[1])) +weight = "./best.h5" +# 加载模型 +# model = load_model(weight, custom_objects={'contrastive_loss': contrastive_loss, 'binary_accuracy': binary_accuracy}) +model = load_model(weight, custom_objects={'output': output}) + + +resize = 52 +img1 = cv2.imread(input1) +img2 = cv2.imread(input2) + +img1 = cv2.resize(img1, (resize, resize)) / 255 +img2 = cv2.resize(img2, (resize, resize)) / 255 + +img1 = np.expand_dims(img1, axis=0) +img2 = np.expand_dims(img2, axis=0) + +result = model.predict([img1, img2]) + +print(result) + + diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..2075b78 --- /dev/null +++ b/predict.py @@ -0,0 +1,182 @@ +import cv2 +import numpy as np +from keras.models import load_model +from keras.layers import Lambda +from keras import backend as K +import os +import random +from PIL import Image +# 要预测的图片 +# image_path = "./sample/1691156257961.jpg" +image_path = os.listdir("./data") +# 随机选取一张图片 +# inp = input('请输入图片名称:') +image_path = "./data/" + random.choice(image_path) +# image_path = './sample/' + inp +# print(image_path) + +""" + YOLOv3 分割模型 +""" +weight = "./yolov3-tiny_17000.weights" +cfg = "./yolov3-tiny.cfg" + +# 加载模型 +net = cv2.dnn.readNet(weight, cfg) +""" + 孪生网络 对比模型 +""" +resize_height, resize_width,channel = 52,52,3 + +# 自定义的损失和精度 +output = Lambda(lambda x: K.abs(x[0] - x[1])) +def contrastive_loss(y_true, y_pred): + margin = 1 + return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0))) + +def binary_accuracy(y_true, y_pred): + return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype))) +weight = "./best.h5" +# 加载模型 +# model = load_model(weight, custom_objects={'contrastive_loss': contrastive_loss, 'binary_accuracy': binary_accuracy}) +model = load_model(weight, custom_objects={'output': output}) + +classes = ["text"] + + +img = cv2.imread(image_path) + +cv2.namedWindow('display') + + +""" + YOLO 分割出内容 +""" +height, width, channels = img.shape +blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False) +net.setInput(blob) +outs = net.forward(net.getUnconnectedOutLayersNames()) +class_ids = [] +confidences = [] +boxes = [] + +for out in outs: + for detection in out: + scores = detection[5:] + class_id = np.argmax(scores) + confidence = scores[class_id] + + if confidence > 0.1: + center_x = int(detection[0] * width) + center_y = int(detection[1] * height) + w = int(detection[2] * width) + h = int(detection[3] * height) + + x = int(center_x - w / 2) + y = int(center_y - h / 2) + + boxes.append([x, y, w, h]) + confidences.append(float(confidence)) + class_ids.append(class_id) + +indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.1, 0.1) + +thickness = 2 +color = (0, 255, 0) +font = cv2.FONT_HERSHEY_PLAIN + +new_boxes = [] + +for i in range(len(boxes)): + if i in indexes: + x, y, w, h = boxes[i] + + new_boxes.append([x, y, w, h]) + +total = len(new_boxes) + +print('>>> 检测出:', total,'个字符') + +top_total = total / 2 +# 如果取出来有小数 +if top_total % 1 != 0: + print('>>> YOLO分割有误,顶部字符数量与点选数量不匹配') + exit() + +top_total = int(top_total) + +# 取出w最大的(需要点选的) +w_max_boxes = sorted(new_boxes, key=lambda x: x[2], reverse=True)[:top_total] + +# 取出剩下的(需要对比的) +w_min_box = sorted(new_boxes, key=lambda x: x[2], reverse=True)[top_total:] + +# 按照从左到右排序w_min_box +w_min_box = sorted(w_min_box, key=lambda x: x[0]) + + +w_max_image = [] +w_min_image = [] + +# 分割出具体图像 +for i in range(top_total): + x, y, w, h = w_max_boxes[i] + p = cv2.resize(img[y:y+h, x:x+w], (resize_height, resize_width)) + + # cv2.imwrite('./1/1_{}.jpg'.format(i), p) + + w_max_image.append(p) + +for i in range(len(w_min_box)): + x, y, w, h = w_min_box[i] + p = cv2.resize(img[y:y+h, x:x+w], (resize_height, resize_width)) + + # cv2.imwrite('./1/2_{}.jpg'.format(i), p) + + w_min_image.append(p) + +# print(w_max_boxes) + +w_max_image_np = np.array(w_max_image) / 255 +w_min_image_np = np.array(w_min_image) / 255 + +select_index = [] + +# 开始挨个对比,取出最相似的 +for i in range(len(w_max_image_np)): + print('>>> 开始对比第', i+1, '个字符') + cv2.imwrite('./1/1_{}.jpg'.format(i), w_max_image_np[i] * 255) + + left_x = w_max_image_np[i] + num_index = 0 + cache_rate = 0 + for k in range(len(w_min_image_np)): + left_y = w_min_image_np[k] + predict = model.predict([left_x.reshape(1, resize_height, resize_width, channel), left_y.reshape(1, resize_height, resize_width, channel)]) + rate = predict[0][0] + if rate > cache_rate: + cv2.imwrite('./1/2_{}.jpg'.format(i), w_min_image_np[k] * 255) + num_index = k + + + + cache_rate = rate + + + select_index.append(num_index) + +print('>>> 对比完成,结果为:', select_index) + +location = [] + +for i in range(len(select_index)): + x, y, w, h = w_max_boxes[select_index[i]] + cv2.rectangle(img, (x, y), (x+w, y+h), color, thickness) + cv2.putText(img, str(i+1), (x, y+h), font, 1, color, thickness) + # 转换为图片坐标中心点 + location.append([x+w/2, y+h/2]) + +print('>>> 位置坐标:', location) + +cv2.imshow('display', img) +cv2.waitKey(0) \ No newline at end of file diff --git a/requirement.txt b/requirement.txt new file mode 100644 index 0000000..0feace6 Binary files /dev/null and b/requirement.txt differ diff --git a/siamese.py b/siamese.py new file mode 100644 index 0000000..522c00b --- /dev/null +++ b/siamese.py @@ -0,0 +1,24 @@ +from tensorflow.keras.layers import Input,Lambda,Dense +from tensorflow.keras.models import Model +from vgg16 import VGG16 + +def siamese(input_shape): + # 基础VGG,因为两个网络共享参数,所以需要是同一个实例 + base_network = VGG16() + + left_input = Input(shape = input_shape) + right_input = Input(shape = input_shape) + + left_output = base_network.call(left_input) + right_output = base_network.call(right_input) + + # l1距离 + output = Lambda(lambda x:abs(x[0]-x[1]))([left_output,right_output]) + + # 全连接层1 + output = Dense(512,activation = 'relu')(output) + + # 全连接层2 + output = Dense(1,activation = 'sigmoid')(output) + + return Model([left_input,right_input],output) \ No newline at end of file diff --git a/train.py b/train.py new file mode 100644 index 0000000..92d0420 --- /dev/null +++ b/train.py @@ -0,0 +1,159 @@ + +import os +import config +import tensorflow as tf +from siamese import siamese +from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model +from tensorflow.keras.optimizers import SGD +from tensorflow.keras.callbacks import * +import cv2 +import random +import numpy as np +import utils + + +if len(config.gpu) >= 1: + # 设置显存分配方式 + gpus = tf.config.experimental.list_physical_devices('GPU') + for gpu in gpus: + tf.config.experimental.set_memory_growth(gpu, True) + + os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(i) for i in config.gpu) + print('>>> GPU:',os.environ['CUDA_VISIBLE_DEVICES']) +input_shape = config.input_shape + +model = siamese(input_shape=(input_shape[0],input_shape[1],input_shape[2])) + +if config.weight_model_path != False: + """ + by_name=True:设置为 True 时,表示根据层的名称进行权重的匹配和加载。这意味着只有具有相同名称的层才会被加载权重,其他层将被忽略。这在微调模型时非常有用,可以只加载与预训练权重文件中的层名称匹配的部分权重。 + + skip_mismatch=True:设置为 True 时,表示如果层数量不匹配或找不到对应的层,则跳过权重加载的错误。这在模型的结构发生变化时很有用,可以避免由于层数不匹配而导致的加载错误。请注意,在层数量不匹配的情况下,任何未匹配的层都不会加载权重。 + """ + model.load_weights(config.weight_model_path,by_name=True,skip_mismatch=True) + +# 如果是多个GPU需要设置 +if len(config.gpu) > 1: + model = multi_gpu_model(model,gpus=len(config.gpu)) + +opt = SGD(lr=config.lr,momentum=0.9) + +min_lr = config.lr * 0.01 + +nbs = 64 +lr_limit_max = 1e-3 +lr_limit_min = 3e-4 +Init_lr_fit = min(max(config.batch_size / nbs * config.lr, lr_limit_min), lr_limit_max) +Min_lr_fit = min(max(config.batch_size / nbs * min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2) + +# 准备数据 +sample = os.listdir(config.sample_path) +data = [] +hasOne = [] +for i in sample: + t = i.split('_')[0] + if t not in hasOne: + hasOne.append(t) + data.append([1, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg']) + # 需要做数据增强的部分,到时候会把2转换成1 + data.append([2, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg']) + data.append([2, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg']) + data.append([2, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg']) + + # 随机负样本 + for j in range(3): + f = random.choice(sample) + while f.split('_')[0] == t: + f = random.choice(sample) + + data.append([0, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + f]) + +# 打乱数据 +random.shuffle(data) + +train_x_left = np.zeros((len(data),input_shape[0],input_shape[1],input_shape[2])) +train_x_right = np.zeros((len(data),input_shape[0],input_shape[1],input_shape[2])) + +train_y = np.zeros((len(data),1)) + +total = len(data) +now = 0 +for i in data: + now += 1 + left = cv2.imread(i[1]) + left = cv2.resize(left,(input_shape[0],input_shape[1])) + + right = cv2.imread(i[2]) + right = cv2.resize(right,(input_shape[0],input_shape[1])) + + label = i[0] + + # 数据增强操作 + if label == 2: + label = 1 + # 随机翻转 + if random.random() > 0.5: + left = cv2.flip(left,1) + right = cv2.flip(right,1) + + # 随机噪声 + for h in range(5): + # 加上随机线条 + if random.random() > 0.5: + cv2.line(left,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(0,0,0),random.randint(1,3)) + cv2.line(right,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(0,0,0),random.randint(1,3)) + + # 加上随机点 + if random.random() > 0.5: + cv2.circle(left,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),random.randint(1,3),(0,0,0),-1) + cv2.circle(right,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),random.randint(1,3),(0,0,0),-1) + + # 色域变换 + if random.random() > 0.5: + left = cv2.cvtColor(left,cv2.COLOR_BGR2HSV) + right = cv2.cvtColor(right,cv2.COLOR_BGR2HSV) + + left[:,:,0] = left[:,:,0] + random.randint(-10,10) + right[:,:,0] = right[:,:,0] + random.randint(-10,10) + + left = cv2.cvtColor(left,cv2.COLOR_HSV2BGR) + right = cv2.cvtColor(right,cv2.COLOR_HSV2BGR) + + if not os.path.exists('./1.jpg'): + cv2.imwrite('./1.jpg',left) + cv2.imwrite('./2.jpg',right) + + + # 归一化 + left = left / 255.0 + right = right / 255.0 + + train_x_left[now-1] = left + train_x_right[now-1] = right + train_y[now-1] = label + + + + print('>>> 正在处理第',now,'/',total,'张图片',end='\r') + + +lr_scheduler_func = utils.get_lr_scheduler('cos', Init_lr_fit, Min_lr_fit, config.epochs) + +if config.tensorboard: + tb = TensorBoard(log_dir=config.tensorboard_log_dir,write_graph=True,write_images=True) + +best_checkpoint = ModelCheckpoint(config.auto_best_checkpoint_path,save_weights_only=False,save_best_only=True,verbose=1,period=1) +epoch_checkpoint = ModelCheckpoint(config.auto_epoch_checkpoint_path,save_weights_only=False,save_best_only=False,verbose=1,period=10) +lr_scheduler = LearningRateScheduler(lr_scheduler_func, verbose=1) + +callback = [best_checkpoint,epoch_checkpoint,lr_scheduler] + +if config.tensorboard: + callback.append(tb) + + +model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['binary_accuracy']) + + +model.fit([train_x_left,train_x_right],train_y,batch_size=config.batch_size,epochs=config.epochs,validation_split=config.valid_rate,callbacks=callback) +model.save_weights(config.model_save_path) \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..d99ad5e --- /dev/null +++ b/utils.py @@ -0,0 +1,41 @@ +import math +from functools import partial + +def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10): + def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters): + if iters <= warmup_total_iters: + # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start + lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2 + ) + warmup_lr_start + elif iters >= total_iters - no_aug_iter: + lr = min_lr + else: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters - no_aug_iter) + ) + ) + return lr + + def step_lr(lr, decay_rate, step_size, iters): + if step_size < 1: + raise ValueError("step_size must above 1.") + n = iters // step_size + out_lr = lr * decay_rate ** n + return out_lr + + if lr_decay_type == "cos": + warmup_total_iters = min(max(warmup_iters_ratio * total_iters, 1), 3) + warmup_lr_start = max(warmup_lr_ratio * lr, 1e-6) + no_aug_iter = min(max(no_aug_iter_ratio * total_iters, 1), 15) + func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter) + else: + decay_rate = (min_lr / lr) ** (1 / (step_num - 1)) + step_size = total_iters / step_num + func = partial(step_lr, lr, decay_rate, step_size) + + return func + diff --git a/vgg16.py b/vgg16.py new file mode 100644 index 0000000..f0593f9 --- /dev/null +++ b/vgg16.py @@ -0,0 +1,72 @@ +from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D +""" + VGG16代码来源于https://github.com/bubbliiiing/Siamese-keras + + 也可以使用tensorflow.keras.applications.VGG16实现 +""" + +class VGG16: + def __init__(self): + # 第一个卷积部分 + # 105, 105, 3 -> 105, 105, 64 -> 52, 52, 64 + self.block1_conv1 = Conv2D(64, (3,3), activation = 'relu', padding = 'same',name = 'block1_conv1') + self.block1_conv2 = Conv2D(64, (3,3), activation = 'relu', padding = 'same',name = 'block1_conv2') + self.block1_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block1_pool') + + # 第二个卷积部分 + # 52, 52, 64 -> 52, 52, 128 -> 26, 26, 128 + self.block2_conv1 = Conv2D(128, (3,3), activation = 'relu', padding = 'same',name = 'block2_conv1') + self.block2_conv2 = Conv2D(128, (3,3), activation = 'relu', padding = 'same',name = 'block2_conv2') + self.block2_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block2_pool') + + # 第三个卷积部分 + # 26, 26, 128-> 26, 26, 256 -> 13, 13, 256 + self.block3_conv1 = Conv2D(256, (3,3), activation = 'relu', padding = 'same',name = 'block3_conv1') + self.block3_conv2 = Conv2D(256, (3,3), activation = 'relu', padding = 'same',name = 'block3_conv2') + self.block3_conv3 = Conv2D(256, (3,3), activation = 'relu', padding = 'same',name = 'block3_conv3') + self.block3_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block3_pool') + + # 第四个卷积部分 + # 13, 13, 256-> 13, 13, 512 -> 6, 6, 512 + self.block4_conv1 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block4_conv1') + self.block4_conv2 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block4_conv2') + self.block4_conv3 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block4_conv3') + self.block4_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block4_pool') + + # 第五个卷积部分 + # 6, 6, 512-> 6, 6, 512 -> 3, 3, 512 + self.block5_conv1 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block5_conv1') + self.block5_conv2 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block5_conv2') + self.block5_conv3 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block5_conv3') + self.block5_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block5_pool') + + # 3*3*512 = 4500 + 90 + 18 = 4608 + self.flatten = Flatten(name = 'flatten') + + def call(self, inputs): + x = inputs + x = self.block1_conv1(x) + x = self.block1_conv2(x) + x = self.block1_pool(x) + + x = self.block2_conv1(x) + x = self.block2_conv2(x) + x = self.block2_pool(x) + + x = self.block3_conv1(x) + x = self.block3_conv2(x) + x = self.block3_conv3(x) + x = self.block3_pool(x) + + x = self.block4_conv1(x) + x = self.block4_conv2(x) + x = self.block4_conv3(x) + x = self.block4_pool(x) + + x = self.block5_conv1(x) + x = self.block5_conv2(x) + x = self.block5_conv3(x) + x = self.block5_pool(x) + + outputs = self.flatten(x) + return outputs diff --git a/yolov3-tiny.cfg b/yolov3-tiny.cfg new file mode 100644 index 0000000..d8a8b02 --- /dev/null +++ b/yolov3-tiny.cfg @@ -0,0 +1,182 @@ +[net] +# Testing +# batch=1 +# subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 30000 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=16 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=1 + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=18 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=1 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 8 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=18 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=1 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1