猿人学第8题-验证码图文点选

This commit is contained in:
aiyingfeng 2023-06-26 14:36:17 +08:00
parent 25e3826852
commit c85edc5847
36 changed files with 169 additions and 17 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 KiB

View File

@ -0,0 +1,147 @@
# 知识点:图像识别、去除干扰线、降噪
## 一、解决点选坐标问题
需要的破解的验证码图片
![debugger](./img/2.png)
查看响应内容,可以正常返回明文的数据,那就证明搞定验证码就可以解开这道题目。
![debugger](./img/4.png)
再查看请求内容,存在请求参数`answer`,仔细观察你会发现请求内容`785|466|746|156|`类似于点选验证码的坐标。
![debugger](./img/5.png)
这里我用审查元素查看点选验证码的`html`不难发现这不就是div组合成的一个矩阵。
![debugger](./img/1.png)
多次点击验证图片的坐标,做了下面这张图,可以清楚的知道图片具体的坐标范围
![debugger](./img/3.png)
由此推断
第1个图片坐标值可以是09
第2个图片坐标值可以是1019
第3个图片坐标值可以是2029
第4个图片坐标值可以是300309
第5个图片坐标值可以是310319
第6个图片坐标值可以是320329
第7个图片坐标值可以是600609
第8个图片坐标值可以是610619
第9个图片坐标值可以是620629
## 二、图像去噪点处理
### 选出rgp颜色最多2个进行降噪处理
def noise_image(img_file):
"""选出rgp颜色最多2个进行降噪处理"""
img = Image.open(img_file)
# colors所有像素rgb值counts对应的数量
colors, counts = np.unique(np.array(img).reshape(-1, 3), axis=0, return_counts=True)
# 排序
ct = np.sort(counts)
# 找到出现最多的2种颜色的个数
top2_counts = ct[-2:].tolist()
# 找到出现最多的2种颜色的下标
subscript_list = []
for k, v in list(enumerate(counts, start=0)):
if v in top2_counts:
subscript_list.append(k)
# 找到出现最多的2种颜色的rgb值
for subscript in subscript_list:
color = colors[subscript]
# 去除颜色
turn_white(img, color[0], color[1], color[2])
return img
![debugger](./img/a-test.png)
### 移除干扰线条
def interference(img_file):
"""移除干扰线条"""
img = Image.open(img_file)
# 读取图片高,宽
height, width = img.height, img.width
# 首先创建一个空列表,用来存放出现在间隔当中的像素点
line_list = []
# 两个for循环,遍历9000次
for x in range(width):
for y in range(height):
r, g, b = img.getpixel((x, y))
if 0 < y < 10 or 96 < y < 105 or 199 < y < 209 or 292 < y < 299:
line_list.append([r, g, b])
if 0 < x < 20 or 109 < x < 120 or 209 < x < 220:
line_list.append([r, g, b])
arr = np.array(line_list)
line_list = np.array(list(set([tuple(t) for t in arr])))
# 处理像素 RGB 值
for line in line_list:
r = line[0]
g = line[1]
b = line[2]
if not (r == 255 and g == 255 and b == 255):
turn_white(img, r, g, b)
return img
![debugger](./img/b-test.png)
### 灰度转换
def binary(img_file, standard=205):
"""灰度转换"""
img = Image.open(img_file)
img = img.convert('L')
pixels = img.load()
for x in range(img.width):
for y in range(img.height):
if pixels[x, y] > standard:
pixels[x, y] = 255
else:
pixels[x, y] = 0
return img
![debugger](./img/c-test.png)
### 增强字体显示效果
def enhance(img_file):
"""增强字体显示效果"""
img = cv2.imread(img_file, 0)
kernel = np.ones((2, 3), 'uint8')
img = cv2.erode(img, kernel, iterations=2)
return img
![debugger](./img/d-test.png)
### 图片切割成单个字体便于识别
def image_clip(img_file):
"""图片切割成单个字体便于识别"""
img = cv2.imread(img_file, 0)
clip_imgs = []
num = 1
for y in range(0, 300, 100):
for x in range(10, 300, 100):
# 裁剪坐标为[y0:y1, x0:x1]
cropped = img[y:y + 100, x:x + 100]
clip_imgs.append(cropped)
cv2.imwrite(f"./img_a/f-{num}.jpg", cropped)
num += 1
return clip_imgs
图片1
![debugger](./img/f-1.jpg)
图片2
![debugger](./img/f-2.jpg)
## 三、图像识别

View File

@ -1,6 +1,5 @@
from PIL import Image
import numpy as np
import ddddocr
import cv2
@ -85,25 +84,31 @@ def enhance(img_file):
return img
def ocr(img_file):
ocr = ddddocr.DdddOcr()
with open(img_file, 'rb') as f:
image = f.read()
res = ocr.classification(image)
return res
def image_clip(img_file):
"""图片切割成单个字体便于识别"""
img = cv2.imread(img_file, 0)
clip_imgs = []
num = 1
for y in range(0, 300, 100):
for x in range(10, 300, 100):
# 裁剪坐标为[y0:y1, x0:x1]
cropped = img[y:y + 100, x:x + 100]
clip_imgs.append(cropped)
cv2.imwrite(f"./img_a/f-{num}.jpg", cropped)
num += 1
return clip_imgs
def run():
image_a = noise_image('./img/1.png')
image_a.save('./img/1-test.png')
image_b = interference('./img/1-test.png')
image_b.save('./img/2-test.png')
image_c = binary('./img/2-test.png')
image_c.save('./img/3-test.png')
image_d = enhance('./img/3-test.png')
cv2.imwrite('./img/4-test.png', image_d)
res = ocr('./img/4-test.png')
print(res)
image_a = noise_image('img_a/a.png')
image_a.save('./img_a/a-test.png')
image_b = interference('./img_a/a-test.png')
image_b.save('./img_a/b-test.png')
image_c = binary('./img_a/b-test.png')
image_c.save('./img_a/c-test.png')
image_d = enhance('./img_a/c-test.png')
cv2.imwrite('./img_a/d-test.png', image_d)
image_clip('./img_a/d-test.png')
if __name__ == '__main__':

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

After

Width:  |  Height:  |  Size: 180 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB