Long Press p

2025-04-20 04:39:57 +08:00 · 2023-07-09 20:36:02 +08:00 · 2023-07-09 20:36:02 +08:00 · 812faf52cc
commit 812faf52cc
parent 79dbf60ef9
11 changed files with 88 additions and 45 deletions
--- a/.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json
+++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json
--- a/.temp_to_pub/EasySpider_windows_x64/execution_instances/2.json
+++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/2.json
--- a/.temp_to_pub/EasySpider_windows_x64/execution_instances/3.json
+++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/3.json
--- a/.temp_to_pub/EasySpider_windows_x64/execution_instances/4.json
+++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/4.json
--- a/.temp_to_pub/EasySpider_windows_x64/tasks/122.json
+++ b/.temp_to_pub/EasySpider_windows_x64/tasks/122.json
--- a/ElectronJS/src/taskGrid/FlowChart.html
+++ b/ElectronJS/src/taskGrid/FlowChart.html
@ -431,7 +431,7 @@
                      </select>
                    <div v-if='parseInt(loopType) < 2'>
                        <label>XPath: <span style="font-size: 30px!important;" title="Relative XPATH writing: start with /, e.g. the loop item XPATH is /html/body/div[1], your input is /*[@id='tab-customer'], then the final addressed xpath is: /html/body/div[1]/*[@id='tab-customer']">☺</span></label>
-                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]'></textarea>
+                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]' placeholder="You cannot use expressions like @href or text() within loops in XPath. You can only locate elements, but cannot fetch attribute values. Declaratives like @href and text() are only supported in data extraction operations. Moreover, it is not recommended to use them. Instead, it's suggested choosing the node type and type of the content to collect directly within data extraction operations."></textarea>
                        <p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(nowNode.parameters['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">(Testing feature) Click here to view other possible XPath expressions</button></p>
                    </div>
                    <div v-if='parseInt(loopType) == 2'>
@ -440,7 +440,7 @@
                    </div>
                    <div v-else-if='parseInt(loopType) < 5'>
                        <label>Content List (Use Field["FieldName"] to input the last extracted value of a field):</label>
-                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="One text/URL per line" v-model='nowNode["parameters"]["textList"]'></textarea>
+                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="One text/URL per line. You cannot use expressions like @href or text() within loops in XPath. You can only locate elements, but cannot fetch attribute values. Declaratives like @href and text() are only supported in data extraction operations. Moreover, it is not recommended to use them. Instead, it's suggested choosing the node type and type of the content to collect directly within data extraction operations." v-model='nowNode["parameters"]["textList"]'></textarea>
                    </div>
                    <div v-else-if='parseInt(loopType) < 7'>
                        <label>Code (Use Field["FieldName"] to input the last extracted value of a field):</label>
--- a/ElectronJS/src/taskGrid/FlowChart_CN.html
+++ b/ElectronJS/src/taskGrid/FlowChart_CN.html
@ -431,12 +431,12 @@
                      </select>
                    <div v-if='parseInt(loopType) < 2'>
                        <label>XPath： <span style="font-size: 30px!important;" title="相对XPATH写法：以/开头，如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为：/html/body/div[1]/*[@id='tab-customer']">☺</span></label>
-                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]'></textarea>
+                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="2" placeholder="循环里的XPath不能用@href或者text()这种写法，只能定位元素不能取属性值，即@href和text()这种写法只在提取数据操作中支持，并且不推荐，建议直接在提取数据操作中选择节点类型和采集内容类型。" v-model='nowNode["parameters"]["xpath"]'></textarea>
                        <p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(nowNode.parameters['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">（测试功能）点此查看其他可能的XPath写法</button></p>
                    </div>
                    <div v-else-if='parseInt(loopType) == 2'>
                        <label>XPath列表：</label>
-                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="每行一个XPath"  v-model='nowNode["parameters"]["pathList"]'></textarea>
+                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="每行一个XPath，循环里的XPath不能用@href或者text()这种写法，只能定位元素不能取属性值，即@href和text()这种写法只在提取数据操作中支持，并且不推荐，建议直接在提取数据操作中选择节点类型和采集内容类型。"  v-model='nowNode["parameters"]["pathList"]'></textarea>
                    </div>
                    <div v-else-if='parseInt(loopType) < 5'>
                        <label>内容列表（用Field["字段名"]来输入某字段提取到的最新值）：</label>
--- a/ElectronJS/src/taskGrid/invokeTask.html
+++ b/ElectronJS/src/taskGrid/invokeTask.html
@ -213,7 +213,7 @@
                <input type="text" class="form-control" v-model="mysql_config_path"></input>
            </div>
        </form>
-        <label style="display: block">{{"Click the button below to execute the task. Click p on the keyboard to pause the task. Manual intervention is possible during the task execution process, ~点击以下按钮执行任务，任务执行过程中可以按p键暂停任务的执行以便" | lang }}<b>{{"~人工干预，" | lang}}</b>{{"such as manually input a password or captcha: ~如手动输入密码，验证码等。" | lang}}</label>
+        <label style="display: block">{{"Click the button below to execute the task. Long press p on the keyboard to pause the task. Manual intervention is possible during the task execution process, ~点击以下按钮执行任务，任务执行过程中可以长按p键暂停任务的执行以便" | lang }}<b>{{"~人工干预，" | lang}}</b>{{"such as manually input a password or captcha: ~如手动输入密码，验证码等。" | lang}}</label>
        <button class="btn btn-primary" v-on:click="localExecuteInstant(false)">{{"Directly Run Locally (Clean Mode)~本地直接执行（纯净模式）" |
            lang}}
        </button>
--- a/ExecuteStage/.vscode/launch.json
+++ b/ExecuteStage/.vscode/launch.json
@ -12,7 +12,7 @@
            "justMyCode": true,
            //  "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
            // "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
-            "args": ["--id", "[35]", "--headless", "0", "--user_data", "1"]
+            "args": ["--id", "[4]", "--headless", "0", "--user_data", "1"]
        }
    ]
 }
--- a/ExecuteStage/easyspider_executestage.py
+++ b/ExecuteStage/easyspider_executestage.py
@ -15,6 +15,8 @@ import time
 import requests
 from urllib.parse import urljoin
 from lxml import etree
+import undetected_chromedriver as uc
+from pynput.keyboard import Key, Listener
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.action_chains import ActionChains
@ -41,7 +43,7 @@ from PIL import Image
 # import uuid
 from threading import Thread, Event
 from myChrome import MyChrome, MyUCChrome
-from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel
+from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
 desired_capabilities = DesiredCapabilities.CHROME
 desired_capabilities["pageLoadStrategy"] = "none"

@ -1429,6 +1431,9 @@ if __name__ == '__main__':
        option.add_argument(
            f'--user-data-dir={absolute_user_data_folder}')  # TMALL 反扒
        option.add_argument("--profile-directory=Default")
+        options.add_argument(
+            f'--user-data-dir={absolute_user_data_folder}')  # TMALL 反扒
+        options.add_argument("--profile-directory=Default")

    if c.headless:
        print("Headless mode")
@ -1445,7 +1450,7 @@ if __name__ == '__main__':

    threads = []
    for i in c.id:
-        print(options)
+        # print(options)
        print("id: ", i)
        if c.read_type == "remote":
            print("remote")
@ -1493,7 +1498,6 @@ if __name__ == '__main__':
            browser_t = MyChrome(
                options=options, chrome_options=option, executable_path=driver_path)
        elif cloudflare == 1:
-            import undetected_chromedriver as uc
            browser_t = MyUCChrome(
                options=options, chrome_options=option, executable_path=driver_path)
            print("Pass Cloudflare Mode")
@ -1507,24 +1511,29 @@ if __name__ == '__main__':
        thread.start()
        # Set the pause operation
        # if sys.platform != "linux": 
+        #     time.sleep(3)
+        #     print("\n\n----------------------------------")
+        #     print("正在运行任务，长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次长按p键。")
+        #     print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
+        #     print("----------------------------------\n\n")
        #     Thread(target=check_pause, args=("p", event)).start()
        # else:
        time.sleep(3)
+        press_time = {"duration": 0, "is_pressed": False}
        print("\n\n----------------------------------")
-        print("正在运行任务，按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次按p键。")
-        print("Running task, press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please press 'p' again.")
+        print("正在运行任务，长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码；如果想恢复任务的执行，请再次长按p键。")
+        print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
        print("----------------------------------\n\n")
        # 使用监听器监听键盘输入
        try:
-            from pynput.keyboard import Key, Listener
-            with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
+            with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
                listener.join()
        except:
            print("您的操作系统不支持暂停功能。")
            print("Your operating system does not support the pause function.")
            
        
-    print("线程长度：", len(threads) )
+    # print("线程长度：", len(threads) )
 	
    for thread in threads:
        print()
--- a/ExecuteStage/utils.py
+++ b/ExecuteStage/utils.py
@ -7,7 +7,7 @@ import os
 import re
 import time
 import uuid
-# import keyboard
+import keyboard
 from openpyxl import Workbook, load_workbook
 import requests
 from urllib.parse import urlparse
@ -23,42 +23,72 @@ def is_valid_url(url):

 def lowercase_tags_in_xpath(xpath):
    return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
-    
-def on_release_creator(event):
+
+
+def on_press_creator(press_time, event):
+    def on_press(key):
+        try:
+            if key.char == 'p':
+                if press_time["is_pressed"] == False: # 没按下p键时，记录按下p键的时间
+                    press_time["duration"] = time.time()
+                    press_time["is_pressed"] = True
+                else: # 按下p键时，判断按下p键的时间是否超过2.5秒
+                    duration = time.time() - press_time["duration"]
+                    if duration > 2.5:
+                        if event._flag == False:
+                            print("任务执行中，按p键暂停执行。")
+                            print("Task is running, press 'p' to pause.")
+                            # 设置Event的值为True，使得线程b可以继续执行
+                            event.set()
+                        else:
+                            # 设置Event的值为False，使得线程b暂停执行
+                            print("任务已暂停，按p键继续执行...")
+                            print("Task paused, press 'p' to continue...")
+                            event.clear()
+                        press_time["duration"] = time.time()
+                        press_time["is_pressed"] = False
+                    # print("按下p键时间：", press_time["duration"])
+        except:
+            pass
+    return on_press
+
+def on_release_creator(event, press_time):
    def on_release(key):
        try:
-            if key.char == 'p':  # 当按下esc键时，退出监听
-                if event._flag == False:
-                    print("任务执行中，按p键暂停执行。")
-                    print("Task is running, press 'p' to pause.")
-                    # 设置Event的值为True，使得线程b可以继续执行
-                    event.set()
-                else:
-                    # 设置Event的值为False，使得线程b暂停执行
-                    print("任务已暂停，按p键继续执行...")
-                    print("Task paused, press 'p' to continue...")
-                    event.clear()
+            # duration = time.time() - press_time["duration"]
+            # # print("松开p键时间：", time.time(), "Duration: ", duration)
+            # if duration > 2.5 and key.char == 'p':
+            #     if event._flag == False:
+            #         print("任务执行中，按p键暂停执行。")
+            #         print("Task is running, press 'p' to pause.")
+            #         # 设置Event的值为True，使得线程b可以继续执行
+            #         event.set()
+            #     else:
+            #         # 设置Event的值为False，使得线程b暂停执行
+            #         print("任务已暂停，按p键继续执行...")
+            #         print("Task paused, press 'p' to continue...")
+            #         event.clear()
+            #     press_time["duration"] = time.time()
+            press_time["is_pressed"] = False
        except:
            pass
    return on_release

-def on_press(key):
-    pass

-# def check_pause(key, event):
-#     while True:
-#         if keyboard.is_pressed(key):  # 按下p键，暂停程序
-#             if event._flag == False:
-#                 print("任务执行中，长按p键暂停执行。")
-#                 print("Task is running, long press 'p' to pause.")
-#                 # 设置Event的值为True，使得线程b可以继续执行
-#                 event.set()
-#             else:
-#                 # 设置Event的值为False，使得线程b暂停执行
-#                 print("任务已暂停，长按p键继续执行...")
-#                 print("Task paused, press 'p' to continue...")
-#                 event.clear()
-#         time.sleep(1)  # 每秒检查一次
+def check_pause(key, event):
+    while True:
+        if keyboard.is_pressed(key):  # 按下p键，暂停程序
+            if event._flag == False:
+                print("任务执行中，长按p键暂停执行。")
+                print("Task is running, long press 'p' to pause.")
+                # 设置Event的值为True，使得线程b可以继续执行
+                event.set()
+            else:
+                # 设置Event的值为False，使得线程b暂停执行
+                print("任务已暂停，长按p键继续执行...")
+                print("Task paused, press 'p' to continue...")
+                event.clear()
+        time.sleep(1)  # 每秒检查一次


 def download_image(url, save_directory):