Change keyboard module

2025-04-22 04:35:26 +08:00 · 2023-07-09 09:22:40 +08:00 · 2023-07-09 09:22:40 +08:00 · ae07c91cf1
commit ae07c91cf1
parent bb6e2b2881
9 changed files with 64 additions and 42 deletions
--- a/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py
+++ b/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py
@ -12,8 +12,8 @@ import sys
 # import base64
 # import hashlib
 import time
-# import keyboard
 import requests
+from urllib.parse import urljoin
 from lxml import etree
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.keys import Keys
@ -39,14 +39,14 @@ import os
 from commandline_config import Config
 import pytesseract
 from PIL import Image
+from pynput.keyboard import Key, Listener
 # import uuid
 from threading import Thread, Event
 from myChrome import MyChrome, MyUCChrome
-from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, write_to_csv, write_to_excel
+from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel
 desired_capabilities = DesiredCapabilities.CHROME
 desired_capabilities["pageLoadStrategy"] = "none"

-
 class BrowserThread(Thread):
    def __init__(self, browser_t, id, service, version, event, saveName, config):
        Thread.__init__(self)
@ -1172,21 +1172,14 @@ class BrowserThread(Thread):
                        continue
                    # p["relativeXPath"] = p["relativeXPath"].lower()
                    # p["relativeXPath"] = lowercase_tags_in_xpath(p["relativeXPath"])
-                    if p["nodeType"] == 2:
-                        if p["relativeXPath"].find("/@href") >= 0:
+                    # 已经有text()或@href了，不需要再加
+                    if p["relativeXPath"].find("/@href") >= 0 or p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
                        xpath = p["relativeXPath"]
-                        else:
+                    elif p["nodeType"] == 2:
                        xpath = p["relativeXPath"] + "/@href"
                    elif p["contentType"] == 1:
-                        # 已经有text()了，不需要再加
-                        if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
-                            xpath = p["relativeXPath"]
-                        else:
                        xpath = p["relativeXPath"] + "/text()"
                    elif p["contentType"] == 0:
-                        if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
-                            xpath = p["relativeXPath"]
-                        else:
                        xpath = p["relativeXPath"] + "//text()"
                    if p["relative"]:
                        # if p["relativeXPath"] == "":
@ -1210,6 +1203,9 @@ class BrowserThread(Thread):
                        # 拼接所有文本内容并去掉两边的空白
                        content = ' '.join(result.strip()
                                           for result in content if result.strip())
+                        if p["nodeType"] == 2:
+                            base_url = self.browser.current_url
+                            content = urljoin(base_url, content) # 合并链接相对路径为绝对路径
                    else:
                        content = p["default"]
                        if not self.dataNotFoundKeys[p["name"]]:
@ -1508,12 +1504,21 @@ if __name__ == '__main__':
        print("Thread with task id: ", i, " is created")
        threads.append(thread)
        thread.start()
-        Thread(target=check_pause, args=("p", event)).start()
-        time.sleep(5)
+        # Set the pause operation
+        # if sys.platform != "linux": 
+        #     Thread(target=check_pause, args=("p", event)).start()
+        # else:
+        time.sleep(3)
        print("\n\n----------------------------------")
-        print("正在运行任务，长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次长按p键。")
+        print("正在运行任务，按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次按p键。")
        print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
        print("----------------------------------\n\n")
+        # 使用监听器监听键盘输入
+        with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
+            listener.join()
+            
+        
+        
 	
    for thread in threads:
        thread.join()
--- a/.temp_to_pub/EasySpider_windows_x64/Code/utils.py
+++ b/.temp_to_pub/EasySpider_windows_x64/Code/utils.py
@ -7,7 +7,7 @@ import os
 import re
 import time
 import uuid
-import keyboard
+# import keyboard
 from openpyxl import Workbook, load_workbook
 import requests
 from urllib.parse import urlparse
@ -24,20 +24,41 @@ def is_valid_url(url):
 def lowercase_tags_in_xpath(xpath):
    return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
    
-def check_pause(key, event):
-    while True:
-        if keyboard.is_pressed(key):  # 按下p键，暂停程序
+def on_release_creator(event):
+    def on_release(key):
+        try:
+            if key.char == 'p':  # 当按下esc键时，退出监听
                if event._flag == False:
-                print("任务执行中，长按p键暂停执行。")
+                    print("任务执行中，按p键暂停执行。")
                    print("Task is running, long press 'p' to pause.")
                    # 设置Event的值为True，使得线程b可以继续执行
                    event.set()
                else:
                    # 设置Event的值为False，使得线程b暂停执行
-                print("任务已暂停，长按p键继续执行...")
+                    print("任务已暂停，按p键继续执行...")
                    print("Task paused, press 'p' to continue...")
                    event.clear()
-        time.sleep(1)  # 每秒检查一次
+        except:
+            pass
+    return on_release
+
+def on_press(key):
+    pass
+
+# def check_pause(key, event):
+#     while True:
+#         if keyboard.is_pressed(key):  # 按下p键，暂停程序
+#             if event._flag == False:
+#                 print("任务执行中，长按p键暂停执行。")
+#                 print("Task is running, long press 'p' to pause.")
+#                 # 设置Event的值为True，使得线程b可以继续执行
+#                 event.set()
+#             else:
+#                 # 设置Event的值为False，使得线程b暂停执行
+#                 print("任务已暂停，长按p键继续执行...")
+#                 print("Task paused, press 'p' to continue...")
+#                 event.clear()
+#         time.sleep(1)  # 每秒检查一次


 def download_image(url, save_directory):
--- a/.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json
+++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json
--- a/.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json
+++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json
--- a/.temp_to_pub/EasySpider_windows_x64/execution_instances/2.json
+++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/2.json
--- a/.temp_to_pub/EasySpider_windows_x64/execution_instances/3.json
+++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/3.json
--- a/ElectronJS/EasySpider_en.crx
+++ b/ElectronJS/EasySpider_en.crx
--- a/ElectronJS/EasySpider_zh.crx
+++ b/ElectronJS/EasySpider_zh.crx
--- a/ExecuteStage/easyspider_executestage.py
+++ b/ExecuteStage/easyspider_executestage.py
@ -1511,7 +1511,7 @@ if __name__ == '__main__':
        time.sleep(3)
        print("\n\n----------------------------------")
        print("正在运行任务，按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次按p键。")
-        print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
+        print("Running task, press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please press 'p' again.")
        print("----------------------------------\n\n")
        # 使用监听器监听键盘输入
        with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener: