猿人学第8题-验证码图文点选

2025-04-19 18:24:51 +08:00 · 2023-06-27 19:51:36 +08:00 · 2023-06-27 19:51:36 +08:00 · ab69bcc0ed
commit ab69bcc0ed
parent 311b13605b
16 changed files with 61 additions and 19 deletions
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/gan_rao_xian.py
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/gan_rao_xian.py
@ -1,4 +1,7 @@
+import random
+
 from PIL import Image
+from xin_fei_ocr import run_ocr
 import numpy as np
 import cv2

@ -87,18 +90,34 @@ def enhance(img_file):
 def image_clip(img_file):
    """图片切割成单个字体便于识别"""
    img = cv2.imread(img_file, 0)
-    clip_imgs = []
+    clip_imgs = {}
    num = 1
    for y in range(0, 300, 100):
        for x in range(10, 300, 100):
            # 裁剪坐标为[y0:y1, x0:x1]
            cropped = img[y:y + 100, x:x + 100]
-            clip_imgs.append(cropped)
            cv2.imwrite(f"./img_a/f-{num}.jpg", cropped)
+            res = run_ocr(f"./img_a/f-{num}.jpg")
+            clip_imgs[res.rstrip()] = coordinate(num)
            num += 1
    return clip_imgs


+def coordinate(num):
+    coordinate_dict = {
+        1: random.randint(0, 9),
+        2: random.randint(10, 19),
+        3: random.randint(20, 29),
+        4: random.randint(300, 309),
+        5: random.randint(310, 319),
+        6: random.randint(320, 329),
+        7: random.randint(600, 609),
+        8: random.randint(610, 619),
+        9: random.randint(620, 629)
+    }
+    return coordinate_dict.get(num)
+
+
 def run():
    image_a = noise_image('img_a/a.png')
    image_a.save('./img_a/a-test.png')
@ -108,8 +127,10 @@ def run():
    image_c.save('./img_a/c-test.png')
    image_d = enhance('./img_a/c-test.png')
    cv2.imwrite('./img_a/d-test.png', image_d)
-    image_clip('./img_a/d-test.png')
+    text_dict = image_clip('./img_a/d-test.png')
+    return text_dict


 if __name__ == '__main__':
-    run()
+    res = run()
+    print(res)
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/a-test.png
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/a-test.png
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/a.png
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/a.png
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/b-test.png
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/b-test.png
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/c-test.png
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/c-test.png
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/d-test.png
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/d-test.png
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-1.jpg
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-1.jpg
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-2.jpg
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-2.jpg
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-3.jpg
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-3.jpg
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-4.jpg
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-4.jpg
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-5.jpg
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-5.jpg
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-6.jpg
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-6.jpg
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-7.jpg
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-7.jpg
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-8.jpg
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-8.jpg
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-9.jpg
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/img_a/f-9.jpg
--- a/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/main.py
+++ b/猿人学Web端爬虫攻防刷题平台/猿人学第8题-验证码图文点选/main.py
@ -1,9 +1,7 @@
-from PIL import Image
+import gan_rao_xian
+import requests
 import re
 import base64
-import requests
-import time
-import json


 class YuanrenXuan(object):
@ -12,24 +10,47 @@ class YuanrenXuan(object):
        self.url = "https://match.yuanrenxue.cn/api/match/8_verify"
        self.sum_value = 0

-    def get_task(self, i):
-        t = int(time.time()) * 1000
+    def get_task(self):
        req = requests.get(self.url)
+        text = re.findall(r'请依次点击：---<p>(.*)</p>--- <br>提示', req.json().get('html'))[0]
+        text_list = text.split('</p>---<p>')
        img = re.findall(r'<img src="(.*)" alt="">', req.json().get('html'))[0]
        img = img.replace('data:image/jpeg;base64,', '')
-        print(img)
        page_content = base64.b64decode(img)
-        with open('img/1.png', 'wb') as f:
+        with open('img_a/a.png', 'wb') as f:
            f.write(page_content)
-        exit()
-        return req.text
+        return text_list
+
+    def get_match(self, page, answer):
+        url = f"https://match.yuanrenxue.cn/api/match/8?page={page}&answer={answer}"
+        payload = {}
+        headers = {
+            'cookie': 'sessionid=iikaj9bo7vzqv4mz1xvryl13o7z98l13;'
+        }
+        response = requests.request("GET", url, headers=headers, data=payload)
+        print(response.json())

    def run(self):
-        for i in range(1, 6):
-            res_dict = json.loads(self.get_task(i))
-            for j in res_dict.get('data'):
-                self.sum_value += j.get('value')
-        print(self.sum_value)
+        num = 1
+        while True:
+            text_list = self.get_task()
+            print(text_list)
+            answer_list = []
+            text_dict = gan_rao_xian.run()
+            print(text_dict)
+            for text in text_list:
+                answer_list.append(str(text_dict.get(text)))
+            answer = '|'.join(answer_list)
+            if 'None' in answer:
+                continue
+            print(num, answer)
+            self.get_match(num, answer)
+            num += 1
+
+            if num == 5:
+                break
+
+            exit()


 if __name__ == '__main__':