mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-20 03:59:57 +08:00
17验证码图片验证码-识别
This commit is contained in:
parent
58cc3e96d6
commit
2876e0582e
34
猿人学练习/17验证码图片验证码-识别/README.md
Normal file
34
猿人学练习/17验证码图片验证码-识别/README.md
Normal file
@ -0,0 +1,34 @@
|
||||
# 知识点: headers反爬,图片验证码,机器学习,图片验证码去噪点
|
||||
|
||||
## 解题思路
|
||||
|
||||
先来看一下图片验证码,为了提高识别的准确度就需要去掉噪点,用windows的画图工具打开
|
||||
|
||||

|
||||
|
||||
你会发现大部分噪点的r, g, b值都是0,0,0,也就是黑色
|
||||
|
||||

|
||||
|
||||
这就好处理了,将黑色部分全部变成白色
|
||||
|
||||
# 将部分像素值变为纯白色
|
||||
if r == 0 and g == 0 and b == 0:
|
||||
img.putpixel((i, j), (255, 255, 255))
|
||||
|
||||
在将图片灰度转换,二值化
|
||||
|
||||
img = img.convert('L')
|
||||
pixels = img.load()
|
||||
for x in range(img.width):
|
||||
for y in range(img.height):
|
||||
if pixels[x, y] > standard:
|
||||
pixels[x, y] = 255
|
||||
else:
|
||||
pixels[x, y] = 0
|
||||
|
||||
看一下效果图
|
||||
|
||||

|
||||
|
||||
接下来就和18题一样,利用百度ocr进行文字识别
|
74
猿人学练习/17验证码图片验证码-识别/main.py
Normal file
74
猿人学练习/17验证码图片验证码-识别/main.py
Normal file
@ -0,0 +1,74 @@
|
||||
import 百度手写文字识别
|
||||
import 去除干扰线
|
||||
import requests
|
||||
from urllib.parse import quote
|
||||
|
||||
|
||||
def code_value():
|
||||
url = "https://www.python-spider.com/api/challenge17/verify"
|
||||
payload = {}
|
||||
headers = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,'
|
||||
'*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'cookie': 'sessionid=你的sessionid;',
|
||||
'sec-ch-ua': '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
|
||||
' Chrome/112.0.0.0 Safari/537.36'
|
||||
}
|
||||
try:
|
||||
response = requests.request("GET", url, headers=headers, data=payload)
|
||||
with open('img/1.png', 'wb') as f:
|
||||
f.write(response.content)
|
||||
去除干扰线.run()
|
||||
response_json = 百度手写文字识别.run()
|
||||
words_result = response_json.get('words_result')
|
||||
words = ''
|
||||
for word in words_result:
|
||||
words += word.get('words')
|
||||
print(f"识别的文字是{words}")
|
||||
return words
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return ''
|
||||
|
||||
|
||||
def challenge18(page):
|
||||
if page != 1:
|
||||
code = code_value()
|
||||
code = quote(code, 'utf-8')
|
||||
else:
|
||||
code = ''
|
||||
url = "https://www.python-spider.com/api/challenge17"
|
||||
payload = f"page={page}&code={code}"
|
||||
headers = {
|
||||
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'cookie': 'sessionid=你的sessionid;'
|
||||
}
|
||||
response = requests.request("POST", url, headers=headers, data=payload)
|
||||
return response.json()
|
||||
|
||||
|
||||
def run():
|
||||
data_num = 0
|
||||
page = 1
|
||||
while True:
|
||||
res_dict = challenge18(page)
|
||||
|
||||
if res_dict.get('message') == 'verify_failed':
|
||||
print(f"验证码没有通过{res_dict}-{page}")
|
||||
continue
|
||||
else:
|
||||
print(f"验证码通过{res_dict}-{page}")
|
||||
|
||||
data_list = res_dict.get('data')
|
||||
for data in data_list:
|
||||
data_num += int(data.get('value'))
|
||||
print(data_num)
|
||||
page += 1
|
||||
|
||||
if page == 101:
|
||||
break
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
32
猿人学练习/17验证码图片验证码-识别/去除干扰线.py
Normal file
32
猿人学练习/17验证码图片验证码-识别/去除干扰线.py
Normal file
@ -0,0 +1,32 @@
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def processing_image(img_file, standard=205):
|
||||
""" 1.将图片进行降噪处理, 通过二值化去掉后面的背景色并加深文字对比度 """
|
||||
img = Image.open(img_file)
|
||||
for i in range(img.size[0]):
|
||||
for j in range(img.size[1]):
|
||||
r, g, b = img.getpixel((i, j))
|
||||
# 将部分像素值变为纯白色
|
||||
if r == 0 and g == 0 and b == 0:
|
||||
img.putpixel((i, j), (255, 255, 255))
|
||||
|
||||
# 灰度转换,二值化: 根据阈值 standard, 将所有像素都置为 0(黑色) 或 255(白色), 便于接下来的分割
|
||||
img = img.convert('L')
|
||||
pixels = img.load()
|
||||
for x in range(img.width):
|
||||
for y in range(img.height):
|
||||
if pixels[x, y] > standard:
|
||||
pixels[x, y] = 255
|
||||
else:
|
||||
pixels[x, y] = 0
|
||||
return img
|
||||
|
||||
|
||||
def run():
|
||||
image_b = processing_image('./img/1.png')
|
||||
image_b.save('./img/1-test.png')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
49
猿人学练习/17验证码图片验证码-识别/百度手写文字识别.py
Normal file
49
猿人学练习/17验证码图片验证码-识别/百度手写文字识别.py
Normal file
@ -0,0 +1,49 @@
|
||||
from urllib.parse import quote
|
||||
import base64
|
||||
import urllib
|
||||
import requests
|
||||
|
||||
API_KEY = ""
|
||||
SECRET_KEY = ""
|
||||
|
||||
|
||||
def get_file_content_as_base64(path, urlencoded=False):
|
||||
"""
|
||||
获取文件base64编码
|
||||
:param path: 文件路径
|
||||
:param urlencoded: 是否对结果进行urlencoded
|
||||
:return: base64编码信息
|
||||
"""
|
||||
with open(path, "rb") as f:
|
||||
content = base64.b64encode(f.read()).decode("utf8")
|
||||
if urlencoded:
|
||||
content = urllib.parse.quote_plus(content)
|
||||
return content
|
||||
|
||||
|
||||
def get_access_token():
|
||||
"""
|
||||
使用 AK,SK 生成鉴权签名(Access Token)
|
||||
:return: access_token,或是None(如果错误)
|
||||
"""
|
||||
url = "https://aip.baidubce.com/oauth/2.0/token"
|
||||
params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
|
||||
return str(requests.post(url, params=params).json().get("access_token"))
|
||||
|
||||
|
||||
def run():
|
||||
image_to_base64_res = get_file_content_as_base64('./img/1-test.png')
|
||||
image_to_base64_res = quote(image_to_base64_res, 'utf-8')
|
||||
url = "https://aip.baidubce.com/rest/2.0/ocr/v1/handwriting?access_token=" + get_access_token()
|
||||
payload = f'image={image_to_base64_res}'
|
||||
headers = {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
response = requests.request("POST", url, headers=headers, data=payload)
|
||||
print(response.json())
|
||||
return response.json()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
Loading…
x
Reference in New Issue
Block a user