Add files via upload

This commit is contained in:
wlkjyy 2023-08-17 14:12:08 +08:00 committed by GitHub
parent e98cedd149
commit 8f089350df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 732 additions and 0 deletions

38
config.py Normal file
View File

@ -0,0 +1,38 @@
"""
网络相关配置文件
"""
# 预训练权重
weight_model_path = './vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
# 输入图像大小52*52*3
input_shape = (52,52,3)
# 如果不使用GPU训练则传入列表多个GPU则传入多个编号
gpu = []
# gpu = [0,1,2,3]
# 训练ecoph数
epochs = 3000
# 批次大小
batch_size = 128
# 学习率
lr = 1e-3
# 是否开启tensorboard
tensorboard = True
# tensorboard日志目录
tensorboard_log_dir = './logs'
# 验证集比例
valid_rate = 0.05
sample_path = './sample'
auto_best_checkpoint_path = './checkpoint/best.h5'
auto_epoch_checkpoint_path = './checkpoint/epoch_{epoch:03d}.h5'
model_save_path = './model.h5'

BIN
model.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 KiB

34
pre.py Normal file
View File

@ -0,0 +1,34 @@
import cv2
import numpy as np
from keras.models import load_model
from keras.layers import Lambda
from keras import backend as K
import os
import random
from PIL import Image
input1 = input('请输入图片名称1')
input2 = input('请输入图片名称2')
output = Lambda(lambda x: K.abs(x[0] - x[1]))
weight = "./best.h5"
# 加载模型
# model = load_model(weight, custom_objects={'contrastive_loss': contrastive_loss, 'binary_accuracy': binary_accuracy})
model = load_model(weight, custom_objects={'output': output})
resize = 52
img1 = cv2.imread(input1)
img2 = cv2.imread(input2)
img1 = cv2.resize(img1, (resize, resize)) / 255
img2 = cv2.resize(img2, (resize, resize)) / 255
img1 = np.expand_dims(img1, axis=0)
img2 = np.expand_dims(img2, axis=0)
result = model.predict([img1, img2])
print(result)

182
predict.py Normal file
View File

@ -0,0 +1,182 @@
import cv2
import numpy as np
from keras.models import load_model
from keras.layers import Lambda
from keras import backend as K
import os
import random
from PIL import Image
# 要预测的图片
# image_path = "./sample/1691156257961.jpg"
image_path = os.listdir("./data")
# 随机选取一张图片
# inp = input('请输入图片名称:')
image_path = "./data/" + random.choice(image_path)
# image_path = './sample/' + inp
# print(image_path)
"""
YOLOv3 分割模型
"""
weight = "./yolov3-tiny_17000.weights"
cfg = "./yolov3-tiny.cfg"
# 加载模型
net = cv2.dnn.readNet(weight, cfg)
"""
孪生网络 对比模型
"""
resize_height, resize_width,channel = 52,52,3
# 自定义的损失和精度
output = Lambda(lambda x: K.abs(x[0] - x[1]))
def contrastive_loss(y_true, y_pred):
margin = 1
return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
def binary_accuracy(y_true, y_pred):
return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
weight = "./best.h5"
# 加载模型
# model = load_model(weight, custom_objects={'contrastive_loss': contrastive_loss, 'binary_accuracy': binary_accuracy})
model = load_model(weight, custom_objects={'output': output})
classes = ["text"]
img = cv2.imread(image_path)
cv2.namedWindow('display')
"""
YOLO 分割出内容
"""
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(net.getUnconnectedOutLayersNames())
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.1:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.1, 0.1)
thickness = 2
color = (0, 255, 0)
font = cv2.FONT_HERSHEY_PLAIN
new_boxes = []
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
new_boxes.append([x, y, w, h])
total = len(new_boxes)
print('>>> 检测出:', total,'个字符')
top_total = total / 2
# 如果取出来有小数
if top_total % 1 != 0:
print('>>> YOLO分割有误顶部字符数量与点选数量不匹配')
exit()
top_total = int(top_total)
# 取出w最大的需要点选的
w_max_boxes = sorted(new_boxes, key=lambda x: x[2], reverse=True)[:top_total]
# 取出剩下的(需要对比的)
w_min_box = sorted(new_boxes, key=lambda x: x[2], reverse=True)[top_total:]
# 按照从左到右排序w_min_box
w_min_box = sorted(w_min_box, key=lambda x: x[0])
w_max_image = []
w_min_image = []
# 分割出具体图像
for i in range(top_total):
x, y, w, h = w_max_boxes[i]
p = cv2.resize(img[y:y+h, x:x+w], (resize_height, resize_width))
# cv2.imwrite('./1/1_{}.jpg'.format(i), p)
w_max_image.append(p)
for i in range(len(w_min_box)):
x, y, w, h = w_min_box[i]
p = cv2.resize(img[y:y+h, x:x+w], (resize_height, resize_width))
# cv2.imwrite('./1/2_{}.jpg'.format(i), p)
w_min_image.append(p)
# print(w_max_boxes)
w_max_image_np = np.array(w_max_image) / 255
w_min_image_np = np.array(w_min_image) / 255
select_index = []
# 开始挨个对比,取出最相似的
for i in range(len(w_max_image_np)):
print('>>> 开始对比第', i+1, '个字符')
cv2.imwrite('./1/1_{}.jpg'.format(i), w_max_image_np[i] * 255)
left_x = w_max_image_np[i]
num_index = 0
cache_rate = 0
for k in range(len(w_min_image_np)):
left_y = w_min_image_np[k]
predict = model.predict([left_x.reshape(1, resize_height, resize_width, channel), left_y.reshape(1, resize_height, resize_width, channel)])
rate = predict[0][0]
if rate > cache_rate:
cv2.imwrite('./1/2_{}.jpg'.format(i), w_min_image_np[k] * 255)
num_index = k
cache_rate = rate
select_index.append(num_index)
print('>>> 对比完成,结果为:', select_index)
location = []
for i in range(len(select_index)):
x, y, w, h = w_max_boxes[select_index[i]]
cv2.rectangle(img, (x, y), (x+w, y+h), color, thickness)
cv2.putText(img, str(i+1), (x, y+h), font, 1, color, thickness)
# 转换为图片坐标中心点
location.append([x+w/2, y+h/2])
print('>>> 位置坐标:', location)
cv2.imshow('display', img)
cv2.waitKey(0)

BIN
requirement.txt Normal file

Binary file not shown.

24
siamese.py Normal file
View File

@ -0,0 +1,24 @@
from tensorflow.keras.layers import Input,Lambda,Dense
from tensorflow.keras.models import Model
from vgg16 import VGG16
def siamese(input_shape):
# 基础VGG因为两个网络共享参数所以需要是同一个实例
base_network = VGG16()
left_input = Input(shape = input_shape)
right_input = Input(shape = input_shape)
left_output = base_network.call(left_input)
right_output = base_network.call(right_input)
# l1距离
output = Lambda(lambda x:abs(x[0]-x[1]))([left_output,right_output])
# 全连接层1
output = Dense(512,activation = 'relu')(output)
# 全连接层2
output = Dense(1,activation = 'sigmoid')(output)
return Model([left_input,right_input],output)

159
train.py Normal file
View File

@ -0,0 +1,159 @@
import os
import config
import tensorflow as tf
from siamese import siamese
from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import *
import cv2
import random
import numpy as np
import utils
if len(config.gpu) >= 1:
# 设置显存分配方式
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(i) for i in config.gpu)
print('>>> GPU:',os.environ['CUDA_VISIBLE_DEVICES'])
input_shape = config.input_shape
model = siamese(input_shape=(input_shape[0],input_shape[1],input_shape[2]))
if config.weight_model_path != False:
"""
by_name=True设置为 True 表示根据层的名称进行权重的匹配和加载这意味着只有具有相同名称的层才会被加载权重其他层将被忽略这在微调模型时非常有用可以只加载与预训练权重文件中的层名称匹配的部分权重
skip_mismatch=True设置为 True 表示如果层数量不匹配或找不到对应的层则跳过权重加载的错误这在模型的结构发生变化时很有用可以避免由于层数不匹配而导致的加载错误请注意在层数量不匹配的情况下任何未匹配的层都不会加载权重
"""
model.load_weights(config.weight_model_path,by_name=True,skip_mismatch=True)
# 如果是多个GPU需要设置
if len(config.gpu) > 1:
model = multi_gpu_model(model,gpus=len(config.gpu))
opt = SGD(lr=config.lr,momentum=0.9)
min_lr = config.lr * 0.01
nbs = 64
lr_limit_max = 1e-3
lr_limit_min = 3e-4
Init_lr_fit = min(max(config.batch_size / nbs * config.lr, lr_limit_min), lr_limit_max)
Min_lr_fit = min(max(config.batch_size / nbs * min_lr, lr_limit_min * 1e-2), lr_limit_max * 1e-2)
# 准备数据
sample = os.listdir(config.sample_path)
data = []
hasOne = []
for i in sample:
t = i.split('_')[0]
if t not in hasOne:
hasOne.append(t)
data.append([1, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg'])
# 需要做数据增强的部分到时候会把2转换成1
data.append([2, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg'])
data.append([2, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg'])
data.append([2, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + t + '_2.jpg'])
# 随机负样本
for j in range(3):
f = random.choice(sample)
while f.split('_')[0] == t:
f = random.choice(sample)
data.append([0, config.sample_path + '/' + t + '_1.jpg', config.sample_path + '/' + f])
# 打乱数据
random.shuffle(data)
train_x_left = np.zeros((len(data),input_shape[0],input_shape[1],input_shape[2]))
train_x_right = np.zeros((len(data),input_shape[0],input_shape[1],input_shape[2]))
train_y = np.zeros((len(data),1))
total = len(data)
now = 0
for i in data:
now += 1
left = cv2.imread(i[1])
left = cv2.resize(left,(input_shape[0],input_shape[1]))
right = cv2.imread(i[2])
right = cv2.resize(right,(input_shape[0],input_shape[1]))
label = i[0]
# 数据增强操作
if label == 2:
label = 1
# 随机翻转
if random.random() > 0.5:
left = cv2.flip(left,1)
right = cv2.flip(right,1)
# 随机噪声
for h in range(5):
# 加上随机线条
if random.random() > 0.5:
cv2.line(left,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(0,0,0),random.randint(1,3))
cv2.line(right,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),(0,0,0),random.randint(1,3))
# 加上随机点
if random.random() > 0.5:
cv2.circle(left,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),random.randint(1,3),(0,0,0),-1)
cv2.circle(right,(random.randint(0,input_shape[0]),random.randint(0,input_shape[1])),random.randint(1,3),(0,0,0),-1)
# 色域变换
if random.random() > 0.5:
left = cv2.cvtColor(left,cv2.COLOR_BGR2HSV)
right = cv2.cvtColor(right,cv2.COLOR_BGR2HSV)
left[:,:,0] = left[:,:,0] + random.randint(-10,10)
right[:,:,0] = right[:,:,0] + random.randint(-10,10)
left = cv2.cvtColor(left,cv2.COLOR_HSV2BGR)
right = cv2.cvtColor(right,cv2.COLOR_HSV2BGR)
if not os.path.exists('./1.jpg'):
cv2.imwrite('./1.jpg',left)
cv2.imwrite('./2.jpg',right)
# 归一化
left = left / 255.0
right = right / 255.0
train_x_left[now-1] = left
train_x_right[now-1] = right
train_y[now-1] = label
print('>>> 正在处理第',now,'/',total,'张图片',end='\r')
lr_scheduler_func = utils.get_lr_scheduler('cos', Init_lr_fit, Min_lr_fit, config.epochs)
if config.tensorboard:
tb = TensorBoard(log_dir=config.tensorboard_log_dir,write_graph=True,write_images=True)
best_checkpoint = ModelCheckpoint(config.auto_best_checkpoint_path,save_weights_only=False,save_best_only=True,verbose=1,period=1)
epoch_checkpoint = ModelCheckpoint(config.auto_epoch_checkpoint_path,save_weights_only=False,save_best_only=False,verbose=1,period=10)
lr_scheduler = LearningRateScheduler(lr_scheduler_func, verbose=1)
callback = [best_checkpoint,epoch_checkpoint,lr_scheduler]
if config.tensorboard:
callback.append(tb)
model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['binary_accuracy'])
model.fit([train_x_left,train_x_right],train_y,batch_size=config.batch_size,epochs=config.epochs,validation_split=config.valid_rate,callbacks=callback)
model.save_weights(config.model_save_path)

41
utils.py Normal file
View File

@ -0,0 +1,41 @@
import math
from functools import partial
def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio = 0.05, warmup_lr_ratio = 0.1, no_aug_iter_ratio = 0.05, step_num = 10):
def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters):
if iters <= warmup_total_iters:
# lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start
lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2
) + warmup_lr_start
elif iters >= total_iters - no_aug_iter:
lr = min_lr
else:
lr = min_lr + 0.5 * (lr - min_lr) * (
1.0
+ math.cos(
math.pi
* (iters - warmup_total_iters)
/ (total_iters - warmup_total_iters - no_aug_iter)
)
)
return lr
def step_lr(lr, decay_rate, step_size, iters):
if step_size < 1:
raise ValueError("step_size must above 1.")
n = iters // step_size
out_lr = lr * decay_rate ** n
return out_lr
if lr_decay_type == "cos":
warmup_total_iters = min(max(warmup_iters_ratio * total_iters, 1), 3)
warmup_lr_start = max(warmup_lr_ratio * lr, 1e-6)
no_aug_iter = min(max(no_aug_iter_ratio * total_iters, 1), 15)
func = partial(yolox_warm_cos_lr ,lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter)
else:
decay_rate = (min_lr / lr) ** (1 / (step_num - 1))
step_size = total_iters / step_num
func = partial(step_lr, lr, decay_rate, step_size)
return func

72
vgg16.py Normal file
View File

@ -0,0 +1,72 @@
from tensorflow.keras.layers import Conv2D, Flatten, MaxPooling2D
"""
VGG16代码来源于https://github.com/bubbliiiing/Siamese-keras
也可以使用tensorflow.keras.applications.VGG16实现
"""
class VGG16:
def __init__(self):
# 第一个卷积部分
# 105, 105, 3 -> 105, 105, 64 -> 52, 52, 64
self.block1_conv1 = Conv2D(64, (3,3), activation = 'relu', padding = 'same',name = 'block1_conv1')
self.block1_conv2 = Conv2D(64, (3,3), activation = 'relu', padding = 'same',name = 'block1_conv2')
self.block1_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block1_pool')
# 第二个卷积部分
# 52, 52, 64 -> 52, 52, 128 -> 26, 26, 128
self.block2_conv1 = Conv2D(128, (3,3), activation = 'relu', padding = 'same',name = 'block2_conv1')
self.block2_conv2 = Conv2D(128, (3,3), activation = 'relu', padding = 'same',name = 'block2_conv2')
self.block2_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block2_pool')
# 第三个卷积部分
# 26, 26, 128-> 26, 26, 256 -> 13, 13, 256
self.block3_conv1 = Conv2D(256, (3,3), activation = 'relu', padding = 'same',name = 'block3_conv1')
self.block3_conv2 = Conv2D(256, (3,3), activation = 'relu', padding = 'same',name = 'block3_conv2')
self.block3_conv3 = Conv2D(256, (3,3), activation = 'relu', padding = 'same',name = 'block3_conv3')
self.block3_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block3_pool')
# 第四个卷积部分
# 13, 13, 256-> 13, 13, 512 -> 6, 6, 512
self.block4_conv1 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block4_conv1')
self.block4_conv2 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block4_conv2')
self.block4_conv3 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block4_conv3')
self.block4_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block4_pool')
# 第五个卷积部分
# 6, 6, 512-> 6, 6, 512 -> 3, 3, 512
self.block5_conv1 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block5_conv1')
self.block5_conv2 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block5_conv2')
self.block5_conv3 = Conv2D(512, (3,3), activation = 'relu', padding = 'same', name = 'block5_conv3')
self.block5_pool = MaxPooling2D((2,2), strides = (2,2), name = 'block5_pool')
# 3*3*512 = 4500 + 90 + 18 = 4608
self.flatten = Flatten(name = 'flatten')
def call(self, inputs):
x = inputs
x = self.block1_conv1(x)
x = self.block1_conv2(x)
x = self.block1_pool(x)
x = self.block2_conv1(x)
x = self.block2_conv2(x)
x = self.block2_pool(x)
x = self.block3_conv1(x)
x = self.block3_conv2(x)
x = self.block3_conv3(x)
x = self.block3_pool(x)
x = self.block4_conv1(x)
x = self.block4_conv2(x)
x = self.block4_conv3(x)
x = self.block4_pool(x)
x = self.block5_conv1(x)
x = self.block5_conv2(x)
x = self.block5_conv3(x)
x = self.block5_pool(x)
outputs = self.flatten(x)
return outputs

182
yolov3-tiny.cfg Normal file
View File

@ -0,0 +1,182 @@
[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=64
subdivisions=8
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 30000
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=1
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=18
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=1
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=18
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
classes=1
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1