dianxuan/predict.py

import cv2
import numpy as np
from keras.models import load_model
from keras.layers import Lambda
from keras import backend as K
import os
import random
from PIL import Image
# 要预测的图片
# image_path = "./sample/1691156257961.jpg"
image_path = os.listdir("./data")
# 随机选取一张图片
# inp = input('请输入图片名称：')
image_path = "./data/" + random.choice(image_path)
# image_path = './sample/' + inp
# print(image_path)

"""
    YOLOv3 分割模型
"""
weight = "./yolov3-tiny_17000.weights"
cfg = "./yolov3-tiny.cfg"

# 加载模型
net = cv2.dnn.readNet(weight, cfg)
"""
    孪生网络 对比模型
"""
resize_height, resize_width,channel = 52,52,3

# 自定义的损失和精度
output = Lambda(lambda x: K.abs(x[0] - x[1]))
def contrastive_loss(y_true, y_pred):
    margin = 1
    return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))

def binary_accuracy(y_true, y_pred):
    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
weight = "./best.h5"
# 加载模型
# model = load_model(weight, custom_objects={'contrastive_loss': contrastive_loss, 'binary_accuracy': binary_accuracy})
model = load_model(weight, custom_objects={'output': output})

classes = ["text"]


img = cv2.imread(image_path)

cv2.namedWindow('display')


"""
    YOLO 分割出内容
"""
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(net.getUnconnectedOutLayersNames())
class_ids = []
confidences = []
boxes = []

for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]

        if confidence > 0.1:
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)

            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.1, 0.1)

thickness = 2
color = (0, 255, 0)
font = cv2.FONT_HERSHEY_PLAIN

new_boxes = []

for i in range(len(boxes)):
    if i in indexes:
        x, y, w, h = boxes[i]

        new_boxes.append([x, y, w, h])

total = len(new_boxes)

print('>>> 检测出：', total,'个字符')

top_total = total / 2
# 如果取出来有小数
if top_total % 1 != 0:
    print('>>> YOLO分割有误，顶部字符数量与点选数量不匹配')
    exit()

top_total = int(top_total)

# 取出w最大的（需要点选的）
w_max_boxes = sorted(new_boxes, key=lambda x: x[2], reverse=True)[:top_total]

# 取出剩下的（需要对比的）
w_min_box = sorted(new_boxes, key=lambda x: x[2], reverse=True)[top_total:]

# 按照从左到右排序w_min_box
w_min_box = sorted(w_min_box, key=lambda x: x[0])


w_max_image = []
w_min_image = []

# 分割出具体图像
for i in range(top_total):
    x, y, w, h = w_max_boxes[i]
    p = cv2.resize(img[y:y+h, x:x+w], (resize_height, resize_width))

    # cv2.imwrite('./1/1_{}.jpg'.format(i), p)

    w_max_image.append(p)

for i in range(len(w_min_box)):
    x, y, w, h = w_min_box[i]
    p = cv2.resize(img[y:y+h, x:x+w], (resize_height, resize_width))

    # cv2.imwrite('./1/2_{}.jpg'.format(i), p)

    w_min_image.append(p)

# print(w_max_boxes)

w_max_image_np = np.array(w_max_image) / 255
w_min_image_np = np.array(w_min_image) / 255

select_index = []

# 开始挨个对比，取出最相似的
for i in range(len(w_max_image_np)):
    print('>>> 开始对比第', i+1, '个字符')
    cv2.imwrite('./1/1_{}.jpg'.format(i), w_max_image_np[i] * 255)

    left_x = w_max_image_np[i]
    num_index = 0
    cache_rate = 0
    for k in range(len(w_min_image_np)):
        left_y = w_min_image_np[k]
        predict = model.predict([left_x.reshape(1, resize_height, resize_width, channel), left_y.reshape(1, resize_height, resize_width, channel)])
        rate = predict[0][0]
        if rate > cache_rate:
            cv2.imwrite('./1/2_{}.jpg'.format(i), w_min_image_np[k] * 255)
            num_index = k


        cache_rate = rate


    select_index.append(num_index)

print('>>> 对比完成，结果为：', select_index)

location = []

for i in range(len(select_index)):
    x, y, w, h = w_max_boxes[select_index[i]]
    cv2.rectangle(img, (x, y), (x+w, y+h), color, thickness)
    cv2.putText(img, str(i+1), (x, y+h), font, 1, color, thickness)
    # 转换为图片坐标中心点
    location.append([x+w/2, y+h/2])

print('>>> 位置坐标：', location)

cv2.imshow('display', img)
cv2.waitKey(0)