猿人学第8题-验证码图文点选
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/...\0.jpg
Normal file
After Width: | Height: | Size: 5.6 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/...\1.jpg
Normal file
After Width: | Height: | Size: 4.5 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/...\2.jpg
Normal file
After Width: | Height: | Size: 5.4 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/...\3.jpg
Normal file
After Width: | Height: | Size: 4.9 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/...\4.jpg
Normal file
After Width: | Height: | Size: 5.7 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/...\5.jpg
Normal file
After Width: | Height: | Size: 4.7 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/...\6.jpg
Normal file
After Width: | Height: | Size: 4.9 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/...\7.jpg
Normal file
After Width: | Height: | Size: 5.4 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/...\8.jpg
Normal file
After Width: | Height: | Size: 5.1 KiB |
147
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/README.md
Normal file
@ -0,0 +1,147 @@
|
||||
# 知识点:图像识别、去除干扰线、降噪
|
||||
|
||||
## 一、解决点选坐标问题
|
||||
|
||||
需要的破解的验证码图片
|
||||
|
||||

|
||||
|
||||
查看响应内容,可以正常返回明文的数据,那就证明搞定验证码就可以解开这道题目。
|
||||
|
||||

|
||||
|
||||
再查看请求内容,存在请求参数`answer`,仔细观察你会发现请求内容`785|466|746|156|`类似于点选验证码的坐标。
|
||||
|
||||

|
||||
|
||||
这里我用审查元素查看点选验证码的`html`,不难发现这不就是div组合成的一个矩阵。
|
||||
|
||||

|
||||
|
||||
多次点击验证图片的坐标,做了下面这张图,可以清楚的知道图片具体的坐标范围
|
||||
|
||||

|
||||
|
||||
由此推断
|
||||
|
||||
第1个图片坐标值可以是(0~9)
|
||||
第2个图片坐标值可以是(10~19)
|
||||
第3个图片坐标值可以是(20~29)
|
||||
第4个图片坐标值可以是(300~309)
|
||||
第5个图片坐标值可以是(310~319)
|
||||
第6个图片坐标值可以是(320~329)
|
||||
第7个图片坐标值可以是(600~609)
|
||||
第8个图片坐标值可以是(610~619)
|
||||
第9个图片坐标值可以是(620~629)
|
||||
|
||||
## 二、图像去噪点处理
|
||||
|
||||
### 选出rgp颜色最多2个,进行降噪处理
|
||||
|
||||
def noise_image(img_file):
|
||||
"""选出rgp颜色最多2个,进行降噪处理"""
|
||||
img = Image.open(img_file)
|
||||
# colors所有像素rgb值,counts对应的数量
|
||||
colors, counts = np.unique(np.array(img).reshape(-1, 3), axis=0, return_counts=True)
|
||||
# 排序
|
||||
ct = np.sort(counts)
|
||||
# 找到出现最多的2种颜色的个数
|
||||
top2_counts = ct[-2:].tolist()
|
||||
# 找到出现最多的2种颜色的下标
|
||||
subscript_list = []
|
||||
for k, v in list(enumerate(counts, start=0)):
|
||||
if v in top2_counts:
|
||||
subscript_list.append(k)
|
||||
# 找到出现最多的2种颜色的rgb值
|
||||
for subscript in subscript_list:
|
||||
color = colors[subscript]
|
||||
# 去除颜色
|
||||
turn_white(img, color[0], color[1], color[2])
|
||||
return img
|
||||
|
||||

|
||||
|
||||
### 移除干扰线条
|
||||
|
||||
def interference(img_file):
|
||||
"""移除干扰线条"""
|
||||
img = Image.open(img_file)
|
||||
# 读取图片高,宽
|
||||
height, width = img.height, img.width
|
||||
# 首先创建一个空列表,用来存放出现在间隔当中的像素点
|
||||
line_list = []
|
||||
# 两个for循环,遍历9000次
|
||||
for x in range(width):
|
||||
for y in range(height):
|
||||
r, g, b = img.getpixel((x, y))
|
||||
if 0 < y < 10 or 96 < y < 105 or 199 < y < 209 or 292 < y < 299:
|
||||
line_list.append([r, g, b])
|
||||
if 0 < x < 20 or 109 < x < 120 or 209 < x < 220:
|
||||
line_list.append([r, g, b])
|
||||
|
||||
arr = np.array(line_list)
|
||||
line_list = np.array(list(set([tuple(t) for t in arr])))
|
||||
# 处理像素 RGB 值
|
||||
for line in line_list:
|
||||
r = line[0]
|
||||
g = line[1]
|
||||
b = line[2]
|
||||
if not (r == 255 and g == 255 and b == 255):
|
||||
turn_white(img, r, g, b)
|
||||
return img
|
||||
|
||||

|
||||
|
||||
### 灰度转换
|
||||
|
||||
def binary(img_file, standard=205):
|
||||
"""灰度转换"""
|
||||
img = Image.open(img_file)
|
||||
img = img.convert('L')
|
||||
pixels = img.load()
|
||||
for x in range(img.width):
|
||||
for y in range(img.height):
|
||||
if pixels[x, y] > standard:
|
||||
pixels[x, y] = 255
|
||||
else:
|
||||
pixels[x, y] = 0
|
||||
return img
|
||||
|
||||

|
||||
|
||||
### 增强字体显示效果
|
||||
|
||||
def enhance(img_file):
|
||||
"""增强字体显示效果"""
|
||||
img = cv2.imread(img_file, 0)
|
||||
kernel = np.ones((2, 3), 'uint8')
|
||||
img = cv2.erode(img, kernel, iterations=2)
|
||||
return img
|
||||
|
||||

|
||||
|
||||
### 图片切割成单个字体便于识别
|
||||
|
||||
def image_clip(img_file):
|
||||
"""图片切割成单个字体便于识别"""
|
||||
img = cv2.imread(img_file, 0)
|
||||
clip_imgs = []
|
||||
num = 1
|
||||
for y in range(0, 300, 100):
|
||||
for x in range(10, 300, 100):
|
||||
# 裁剪坐标为[y0:y1, x0:x1]
|
||||
cropped = img[y:y + 100, x:x + 100]
|
||||
clip_imgs.append(cropped)
|
||||
cv2.imwrite(f"./img_a/f-{num}.jpg", cropped)
|
||||
num += 1
|
||||
return clip_imgs
|
||||
|
||||
图片1
|
||||
|
||||

|
||||
|
||||
图片2
|
||||
|
||||

|
||||
|
||||
## 三、图像识别
|
@ -1,6 +1,5 @@
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import ddddocr
|
||||
import cv2
|
||||
|
||||
|
||||
@ -85,25 +84,31 @@ def enhance(img_file):
|
||||
return img
|
||||
|
||||
|
||||
def ocr(img_file):
|
||||
ocr = ddddocr.DdddOcr()
|
||||
with open(img_file, 'rb') as f:
|
||||
image = f.read()
|
||||
res = ocr.classification(image)
|
||||
return res
|
||||
def image_clip(img_file):
|
||||
"""图片切割成单个字体便于识别"""
|
||||
img = cv2.imread(img_file, 0)
|
||||
clip_imgs = []
|
||||
num = 1
|
||||
for y in range(0, 300, 100):
|
||||
for x in range(10, 300, 100):
|
||||
# 裁剪坐标为[y0:y1, x0:x1]
|
||||
cropped = img[y:y + 100, x:x + 100]
|
||||
clip_imgs.append(cropped)
|
||||
cv2.imwrite(f"./img_a/f-{num}.jpg", cropped)
|
||||
num += 1
|
||||
return clip_imgs
|
||||
|
||||
|
||||
def run():
|
||||
image_a = noise_image('./img/1.png')
|
||||
image_a.save('./img/1-test.png')
|
||||
image_b = interference('./img/1-test.png')
|
||||
image_b.save('./img/2-test.png')
|
||||
image_c = binary('./img/2-test.png')
|
||||
image_c.save('./img/3-test.png')
|
||||
image_d = enhance('./img/3-test.png')
|
||||
cv2.imwrite('./img/4-test.png', image_d)
|
||||
res = ocr('./img/4-test.png')
|
||||
print(res)
|
||||
image_a = noise_image('img_a/a.png')
|
||||
image_a.save('./img_a/a-test.png')
|
||||
image_b = interference('./img_a/a-test.png')
|
||||
image_b.save('./img_a/b-test.png')
|
||||
image_c = binary('./img_a/b-test.png')
|
||||
image_c.save('./img_a/c-test.png')
|
||||
image_d = enhance('./img_a/c-test.png')
|
||||
cv2.imwrite('./img_a/d-test.png', image_d)
|
||||
image_clip('./img_a/d-test.png')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Before Width: | Height: | Size: 49 KiB After Width: | Height: | Size: 180 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/2.png
Normal file
After Width: | Height: | Size: 49 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/3.png
Normal file
After Width: | Height: | Size: 111 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/4.png
Normal file
After Width: | Height: | Size: 40 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/5.png
Normal file
After Width: | Height: | Size: 20 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/a-test.png
Normal file
After Width: | Height: | Size: 27 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/b-test.png
Normal file
After Width: | Height: | Size: 22 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/c-test.png
Normal file
After Width: | Height: | Size: 8.8 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/d-test.png
Normal file
After Width: | Height: | Size: 5.3 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/f-1.jpg
Normal file
After Width: | Height: | Size: 4.7 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img/f-2.jpg
Normal file
After Width: | Height: | Size: 5.1 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/a-test.png
Normal file
After Width: | Height: | Size: 27 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/a.png
Normal file
After Width: | Height: | Size: 49 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/b-test.png
Normal file
After Width: | Height: | Size: 22 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/c-test.png
Normal file
After Width: | Height: | Size: 8.8 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/d-test.png
Normal file
After Width: | Height: | Size: 5.3 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-1.jpg
Normal file
After Width: | Height: | Size: 4.7 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-2.jpg
Normal file
After Width: | Height: | Size: 5.1 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-3.jpg
Normal file
After Width: | Height: | Size: 4.5 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-4.jpg
Normal file
After Width: | Height: | Size: 3.9 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-5.jpg
Normal file
After Width: | Height: | Size: 4.3 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-6.jpg
Normal file
After Width: | Height: | Size: 5.2 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-7.jpg
Normal file
After Width: | Height: | Size: 5.5 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-8.jpg
Normal file
After Width: | Height: | Size: 4.3 KiB |
BIN
猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-9.jpg
Normal file
After Width: | Height: | Size: 4.5 KiB |