Add files via upload

2025-04-12 11:48:25 +08:00 · 2023-08-17 14:12:08 +08:00 · 2023-08-17 14:12:08 +08:00 · 8f089350df
commit 8f089350df
parent e98cedd149
10 changed files with 732 additions and 0 deletions
--- a/config.py
+++ b/config.py
@ -0,0 +1,38 @@
 """ 
    网络相关配置文件
 """
 # 预训练权重
 weight_model_path = './vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
 # 输入图像大小，52*52*3
 input_shape = (52,52,3)
 # 如果不使用GPU训练，则传入列表，多个GPU则传入多个编号
 gpu = []
 # gpu = [0,1,2,3]
 # 训练ecoph数
 epochs = 3000
 # 批次大小
 batch_size = 128
 # 学习率
 lr = 1e-3
 # 是否开启tensorboard
 tensorboard = True
 # tensorboard日志目录
 tensorboard_log_dir = './logs'
 # 验证集比例
 valid_rate = 0.05
 sample_path = './sample'
 auto_best_checkpoint_path = './checkpoint/best.h5'
 auto_epoch_checkpoint_path = './checkpoint/epoch_{epoch:03d}.h5'
 model_save_path = './model.h5'
--- a/model.png
+++ b/model.png
--- a/pre.py
+++ b/pre.py
@ -0,0 +1,34 @@
 import cv2
 import numpy as np
 from keras.models import load_model
 from keras.layers import Lambda
 from keras import backend as K
 import os
 import random
 from PIL import Image
 input1 = input('请输入图片名称1：')
 input2 = input('请输入图片名称2：')
 output = Lambda(lambda x: K.abs(x[0] - x[1]))
 weight = "./best.h5"
 # 加载模型
 # model = load_model(weight, custom_objects={'contrastive_loss': contrastive_loss, 'binary_accuracy': binary_accuracy})
 model = load_model(weight, custom_objects={'output': output})
 resize = 52
 img1 = cv2.imread(input1)
 img2 = cv2.imread(input2)
 img1 = cv2.resize(img1, (resize, resize)) / 255
 img2 = cv2.resize(img2, (resize, resize)) / 255
 img1 = np.expand_dims(img1, axis=0)
 img2 = np.expand_dims(img2, axis=0)
 result = model.predict([img1, img2])
 print(result)
--- a/predict.py
+++ b/predict.py
@ -0,0 +1,182 @@
 import cv2
 import numpy as np
 from keras.models import load_model
 from keras.layers import Lambda
 from keras import backend as K
 import os
 import random
 from PIL import Image
 # 要预测的图片
 # image_path = "./sample/1691156257961.jpg"
 image_path = os.listdir("./data")
 # 随机选取一张图片
 # inp = input('请输入图片名称：')
 image_path = "./data/" + random.choice(image_path)
 # image_path = './sample/' + inp
 # print(image_path)
 """ 
    YOLOv3 分割模型
 """
 weight = "./yolov3-tiny_17000.weights"
 cfg = "./yolov3-tiny.cfg"
 # 加载模型
 net = cv2.dnn.readNet(weight, cfg)
 """ 
    孪生网络 对比模型
 """
 resize_height, resize_width,channel = 52,52,3
 # 自定义的损失和精度
 output = Lambda(lambda x: K.abs(x[0] - x[1]))
 def contrastive_loss(y_true, y_pred):
    margin = 1
    return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
 def binary_accuracy(y_true, y_pred):
    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
 weight = "./best.h5"
 # 加载模型
 # model = load_model(weight, custom_objects={'contrastive_loss': contrastive_loss, 'binary_accuracy': binary_accuracy})
 model = load_model(weight, custom_objects={'output': output})
 classes = ["text"]
 img = cv2.imread(image_path)
 cv2.namedWindow('display')
 """ 
    YOLO 分割出内容
 """
 height, width, channels = img.shape
 blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
 net.setInput(blob)
 outs = net.forward(net.getUnconnectedOutLayersNames())
 class_ids = []
 confidences = []
 boxes = []
 for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.1:
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)
            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)
 indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.1, 0.1)
 thickness = 2
 color = (0, 255, 0)
 font = cv2.FONT_HERSHEY_PLAIN
 new_boxes = []
 for i in range(len(boxes)):
    if i in indexes:
        x, y, w, h = boxes[i]
        new_boxes.append([x, y, w, h])
 total = len(new_boxes)
 print('>>> 检测出：', total,'个字符')
 top_total = total / 2
 # 如果取出来有小数
 if top_total % 1 != 0:
    print('>>> YOLO分割有误，顶部字符数量与点选数量不匹配')
    exit()
 top_total = int(top_total)
 # 取出w最大的（需要点选的）
 w_max_boxes = sorted(new_boxes, key=lambda x: x[2], reverse=True)[:top_total]
 # 取出剩下的（需要对比的）
 w_min_box = sorted(new_boxes, key=lambda x: x[2], reverse=True)[top_total:]
 # 按照从左到右排序w_min_box
 w_min_box = sorted(w_min_box, key=lambda x: x[0])
 w_max_image = []
 w_min_image = []
 # 分割出具体图像
 for i in range(top_total):
    x, y, w, h = w_max_boxes[i]
    p = cv2.resize(img[y:y+h, x:x+w], (resize_height, resize_width))
    # cv2.imwrite('./1/1_{}.jpg'.format(i), p)
    w_max_image.append(p)
 for i in range(len(w_min_box)):
    x, y, w, h = w_min_box[i]
    p = cv2.resize(img[y:y+h, x:x+w], (resize_height, resize_width))
    # cv2.imwrite('./1/2_{}.jpg'.format(i), p)
    w_min_image.append(p)
 # print(w_max_boxes)
 w_max_image_np = np.array(w_max_image) / 255
 w_min_image_np = np.array(w_min_image) / 255
 select_index = []
 # 开始挨个对比，取出最相似的
 for i in range(len(w_max_image_np)):
    print('>>> 开始对比第', i+1, '个字符')
    cv2.imwrite('./1/1_{}.jpg'.format(i), w_max_image_np[i] * 255)
    left_x = w_max_image_np[i]
    num_index = 0
    cache_rate = 0
    for k in range(len(w_min_image_np)):
        left_y = w_min_image_np[k]
        predict = model.predict([left_x.reshape(1, resize_height, resize_width, channel), left_y.reshape(1, resize_height, resize_width, channel)])
        rate = predict[0][0]
        if rate > cache_rate:
            cv2.imwrite('./1/2_{}.jpg'.format(i), w_min_image_np[k] * 255)
            num_index = k
        cache_rate = rate
    select_index.append(num_index)
 print('>>> 对比完成，结果为：', select_index)
 location = []
 for i in range(len(select_index)):
    x, y, w, h = w_max_boxes[select_index[i]]
    cv2.rectangle(img, (x, y), (x+w, y+h), color, thickness)
    cv2.putText(img, str(i+1), (x, y+h), font, 1, color, thickness)
    # 转换为图片坐标中心点
    location.append([x+w/2, y+h/2])
 print('>>> 位置坐标：', location)
 cv2.imshow('display', img)
 cv2.waitKey(0)
--- a/requirement.txt
+++ b/requirement.txt
--- a/siamese.py
+++ b/siamese.py
@ -0,0 +1,24 @@
 from tensorflow.keras.layers import Input,Lambda,Dense
 from tensorflow.keras.models import Model
 from vgg16 import VGG16
 def siamese(input_shape):
    # 基础VGG，因为两个网络共享参数，所以需要是同一个实例
    base_network = VGG16()
    left_input = Input(shape = input_shape)
    right_input = Input(shape = input_shape)
    left_output = base_network.call(left_input)
    right_output = base_network.call(right_input)
    # l1距离
    output = Lambda(lambda x:abs(x[0]-x[1]))([left_output,right_output])
    # 全连接层1
    output = Dense(512,activation = 'relu')(output)
    # 全连接层2
    output = Dense(1,activation = 'sigmoid')(output)
    return Model([left_input,right_input],output)
--- a/train.py
+++ b/train.py
@ -0,0 +1,159 @@
 import os
 import config
 import tensorflow as tf
 from siamese import siamese
 from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model
 from tensorflow.keras.optimizers import SGD
 from tensorflow.keras.callbacks import *
 import cv2
 import random
 import numpy as np
 import utils
 if len(config.gpu) >= 1:
    # 设置显存分配方式
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(i) for i in config.gpu)
    print('>>> GPU:',os.environ['CUDA_VISIBLE_DEVICES'])
 input_shape = config.input_shape
 model = siamese(input_shape=(input_shape[0],input_shape[1],input_shape[2]))
 if config.weight_model_path != False:
    """ 
        by_name=True：设置为 True 时，表示根据层的名称进行权重的匹配和加载。这意味着只有具有相同名称的层才会被加载权重，其他层将被忽略。这在微调模型时非常有用，可以只加载与预训练权重文件中的层名称匹配的部分权重。
        skip_mismatch=True：设置为 True 时，表示如果层数量不匹配或找不到对应的层，则跳过权重加载的错误。这在模型的结构发生变化时很有用，可以避免由于层数不匹配而导致的加载错误。请注意，在层数量不匹配的情况下，任何未匹配的层都不会加载权重。
    """
    model.load_weights(config.weight_model_path,by_name=True,skip_mismatch=True)
 # 如果是多个GPU需要设置
 if len(config.gpu) > 1:
    model = multi_gpu_model(model,gpus=len(config.gpu))
 opt = SGD(lr=config.lr,momentum=0.9)
 min_lr = config.lr * 0.01
 nbs             = 64
 lr_limit_max    = 1e-3
 lr_limit_min    = 3e-4
 Init_lr_fit     = min(max(config.batch_size / nbs * config.lr, lr_limit_min), lr_limit_max)
 Min_lr_fit      = min(max(config.batch_size / nbs * min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)
 # 准备数据
 sample = os.listdir(config.sample_path)
 data = []
 hasOne = []
 for i in sample:
    t = i.split('_')[0]
    if t not in hasOne:
        hasOne.append(t)
        data.append([1, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg'])
        # 需要做数据增强的部分，到时候会把2转换成1
        data.append([2, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg'])
        data.append([2, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg'])
        data.append([2, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg'])
        # 随机负样本
        for j in range(3):
            f = random.choice(sample)
            while f.split('_')[0] == t:
                f = random.choice(sample)
            data.append([0, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + f])
 # 打乱数据
 random.shuffle(data)
 train_x_left = np.zeros((len(data),input_shape[0],input_shape[1],input_shape[2]))
 train_x_right = np.zeros((len(data),input_shape[0],input_shape[1],input_shape[2]))
 train_y = np.zeros((len(data),1))
 total = len(data)
 now = 0
 for i in data:
    now += 1 
    left = cv2.imread(i[1])
    left = cv2.resize(left,(input_shape[0],input_shape[1]))
    right = cv2.imread(i[2])
    right = cv2.resize(right,(input_shape[0],input_shape[1]))
    label = i[0]
    # 数据增强操作
    if label == 2:
        label = 1
        # 随机翻转
        if random.random() > 0.5:
            left = cv2.flip(left,1)
            right = cv2.flip(right,1)
        # 随机噪声
        for h in range(5):
            # 加上随机线条
            if random.random() > 0.5:
                cv2.line(left,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(0,0,0),random.randint(1,3))
                cv2.line(right,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(0,0,0),random.randint(1,3))
            # 加上随机点
            if random.random() > 0.5:
                cv2.circle(left,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),random.randint(1,3),(0,0,0),-1)
                cv2.circle(right,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),random.randint(1,3),(0,0,0),-1)
        # 色域变换
        if random.random() > 0.5:
            left = cv2.cvtColor(left,cv2.COLOR_BGR2HSV)
            right = cv2.cvtColor(right,cv2.COLOR_BGR2HSV)
            left[:,:,0] = left[:,:,0] + random.randint(-10,10)
            right[:,:,0] = right[:,:,0] + random.randint(-10,10)
            left = cv2.cvtColor(left,cv2.COLOR_HSV2BGR)
            right = cv2.cvtColor(right,cv2.COLOR_HSV2BGR)
        if not os.path.exists('./1.jpg'):
            cv2.imwrite('./1.jpg',left)
            cv2.imwrite('./2.jpg',right)
    # 归一化
    left = left / 255.0
    right = right / 255.0
    train_x_left[now-1] = left
    train_x_right[now-1] = right
    train_y[now-1] = label
    print('>>> 正在处理第',now,'/',total,'张图片',end='\r')
 lr_scheduler_func = utils.get_lr_scheduler('cos', Init_lr_fit, Min_lr_fit, config.epochs)
 if config.tensorboard:
    tb = TensorBoard(log_dir=config.tensorboard_log_dir,write_graph=True,write_images=True)
 best_checkpoint = ModelCheckpoint(config.auto_best_checkpoint_path,save_weights_only=False,save_best_only=True,verbose=1,period=1)
 epoch_checkpoint = ModelCheckpoint(config.auto_epoch_checkpoint_path,save_weights_only=False,save_best_only=False,verbose=1,period=10)
 lr_scheduler = LearningRateScheduler(lr_scheduler_func, verbose=1)
 callback = [best_checkpoint,epoch_checkpoint,lr_scheduler]
 if config.tensorboard:
    callback.append(tb)
 model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['binary_accuracy'])
 model.fit([train_x_left,train_x_right],train_y,batch_size=config.batch_size,epochs=config.epochs,validation_split=config.valid_rate,callbacks=callback)
 model.save_weights(config.model_save_path)
--- a/utils.py
+++ b/utils.py
@ -0,0 +1,41 @@
 import math
 from functools import partial
 def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
    def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
        if iters <= warmup_total_iters:
            # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
            lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2
            ) + warmup_lr_start
        elif iters >= total_iters - no_aug_iter:
            lr = min_lr
        else:
            lr = min_lr + 0.5 * (lr - min_lr) * (
                1.0
                + math.cos(
                    math.pi
                    * (iters - warmup_total_iters)
                    / (total_iters - warmup_total_iters - no_aug_iter)
                )
            )
        return lr
    def step_lr(lr, decay_rate, step_size, iters):
        if step_size < 1:
            raise ValueError("step_size must above 1.")
        n       = iters // step_size
        out_lr  = lr * decay_rate ** n
        return out_lr
    if lr_decay_type == "cos":
        warmup_total_iters  = min(max(warmup_iters_ratio * total_iters, 1), 3)
        warmup_lr_start     = max(warmup_lr_ratio * lr, 1e-6)
        no_aug_iter         = min(max(no_aug_iter_ratio * total_iters, 1), 15)
        func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter)
    else:
        decay_rate  = (min_lr / lr) ** (1 / (step_num - 1))
        step_size   = total_iters / step_num
        func = partial(step_lr, lr, decay_rate, step_size)
    return func
--- a/vgg16.py
+++ b/vgg16.py
@ -0,0 +1,72 @@
 from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D
 """ 
    VGG16代码来源于https://github.com/bubbliiiing/Siamese-keras
    也可以使用tensorflow.keras.applications.VGG16实现
 """
 class VGG16:
    def __init__(self):
        # 第一个卷积部分
        # 105, 105, 3 -> 105, 105, 64 -> 52, 52, 64
        self.block1_conv1 = Conv2D(64, (3,3), activation = 'relu', padding = 'same',name = 'block1_conv1')
        self.block1_conv2 = Conv2D(64, (3,3), activation = 'relu', padding = 'same',name = 'block1_conv2')
        self.block1_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block1_pool')
        # 第二个卷积部分
        # 52, 52, 64 -> 52, 52, 128 -> 26, 26, 128
        self.block2_conv1 = Conv2D(128, (3,3), activation = 'relu', padding = 'same',name = 'block2_conv1')
        self.block2_conv2 = Conv2D(128, (3,3), activation = 'relu', padding = 'same',name = 'block2_conv2')
        self.block2_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block2_pool')
        # 第三个卷积部分
        # 26, 26, 128-> 26, 26, 256 -> 13, 13, 256
        self.block3_conv1 = Conv2D(256, (3,3), activation = 'relu', padding = 'same',name = 'block3_conv1')
        self.block3_conv2 = Conv2D(256, (3,3), activation = 'relu', padding = 'same',name = 'block3_conv2')
        self.block3_conv3 = Conv2D(256, (3,3), activation = 'relu', padding = 'same',name = 'block3_conv3')
        self.block3_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block3_pool')
        # 第四个卷积部分
        # 13, 13, 256-> 13, 13, 512 -> 6, 6, 512
        self.block4_conv1 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block4_conv1')
        self.block4_conv2 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block4_conv2')
        self.block4_conv3 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block4_conv3')
        self.block4_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block4_pool')
        # 第五个卷积部分
        # 6, 6, 512-> 6, 6, 512 -> 3, 3, 512
        self.block5_conv1 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block5_conv1')
        self.block5_conv2 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block5_conv2')
        self.block5_conv3 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block5_conv3')   
        self.block5_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block5_pool')
        # 3*3*512 = 4500 + 90 + 18 = 4608
        self.flatten = Flatten(name = 'flatten')
    def call(self, inputs):
        x = inputs
        x = self.block1_conv1(x)
        x = self.block1_conv2(x)
        x = self.block1_pool(x)
        x = self.block2_conv1(x)
        x = self.block2_conv2(x)
        x = self.block2_pool(x)
        x = self.block3_conv1(x)
        x = self.block3_conv2(x)
        x = self.block3_conv3(x)
        x = self.block3_pool(x)
        x = self.block4_conv1(x)
        x = self.block4_conv2(x)
        x = self.block4_conv3(x)
        x = self.block4_pool(x)
        x = self.block5_conv1(x)
        x = self.block5_conv2(x)
        x = self.block5_conv3(x)
        x = self.block5_pool(x)
        outputs = self.flatten(x)
        return outputs
--- a/yolov3-tiny.cfg
+++ b/yolov3-tiny.cfg
@ -0,0 +1,182 @@
 [net]
 # Testing
 # batch=1
 # subdivisions=1
 # Training
 batch=64
 subdivisions=8
 width=416
 height=416
 channels=3
 momentum=0.9
 decay=0.0005
 angle=0
 saturation = 1.5
 exposure = 1.5
 hue=.1
 learning_rate=0.001
 burn_in=1000
 max_batches = 30000
 policy=steps
 steps=400000,450000
 scales=.1,.1
 [convolutional]
 batch_normalize=1
 filters=16
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=32
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=64
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=128
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=2
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [maxpool]
 size=2
 stride=1
 [convolutional]
 batch_normalize=1
 filters=1024
 size=3
 stride=1
 pad=1
 activation=leaky
 ###########
 [convolutional]
 batch_normalize=1
 filters=256
 size=1
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 batch_normalize=1
 filters=512
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=18
 activation=linear
 [yolo]
 mask = 3,4,5
 anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
 classes=1
 num=6
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1
 [route]
 layers = -4
 [convolutional]
 batch_normalize=1
 filters=128
 size=1
 stride=1
 pad=1
 activation=leaky
 [upsample]
 stride=2
 [route]
 layers = -1, 8
 [convolutional]
 batch_normalize=1
 filters=256
 size=3
 stride=1
 pad=1
 activation=leaky
 [convolutional]
 size=1
 stride=1
 pad=1
 filters=18
 activation=linear
 [yolo]
 mask = 0,1,2
 anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
 classes=1
 num=6
 jitter=.3
 ignore_thresh = .7
 truth_thresh = 1
 random=1