diff --git a/.temp_to_pub/.gitignore b/.temp_to_pub/.gitignore index 1009635..ccdd823 100644 --- a/.temp_to_pub/.gitignore +++ b/.temp_to_pub/.gitignore @@ -1,8 +1,10 @@ EasySpider_MacOS_all_arch/easyspider_executestage +EasySpider_Linux64_x64/user_data +EasySpider_windows_x86/user_data EasySpider EasySpider.app/ EasySpider_windows_x64/user_data *.tmp *.7z* config.json -mysql_config.json \ No newline at end of file +mysql_config.json diff --git a/.temp_to_pub/EasySpider_Linux_x64/Code/easyspider_executestage.py b/.temp_to_pub/EasySpider_Linux_x64/Code/easyspider_executestage.py index 920e254..0bf2879 100644 --- a/.temp_to_pub/EasySpider_Linux_x64/Code/easyspider_executestage.py +++ b/.temp_to_pub/EasySpider_Linux_x64/Code/easyspider_executestage.py @@ -15,6 +15,8 @@ import time import requests from urllib.parse import urljoin from lxml import etree +import undetected_chromedriver as uc +from pynput.keyboard import Key, Listener from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.action_chains import ActionChains @@ -29,7 +31,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.support.ui import Select from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By -import undetected_chromedriver as uc import random # import pandas as pd from openpyxl import load_workbook, Workbook @@ -42,7 +43,7 @@ from PIL import Image # import uuid from threading import Thread, Event from myChrome import MyChrome, MyUCChrome -from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel +from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel desired_capabilities = DesiredCapabilities.CHROME desired_capabilities["pageLoadStrategy"] = "none" @@ -1326,6 +1327,8 @@ class BrowserThread(Thread): if __name__ == '__main__': + from multiprocessing import freeze_support + freeze_support() # 防止无限死循环多开 config = { "id": [0], "saved_file_name": "", @@ -1367,16 +1370,19 @@ if __name__ == '__main__': driver_path = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe") option.add_extension("EasySpider/resources/app/XPathHelper.crx") + options.add_extension("EasySpider/resources/app/XPathHelper.crx") elif sys.platform == "win32" and platform.architecture()[0] == "64bit": options.binary_location = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") driver_path = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe") option.add_extension("EasySpider/resources/app/XPathHelper.crx") + options.add_extension("EasySpider/resources/app/XPathHelper.crx") elif sys.platform == "linux" and platform.architecture()[0] == "64bit": options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64" option.add_extension("EasySpider/resources/app/XPathHelper.crx") + options.add_extension("EasySpider/resources/app/XPathHelper.crx") else: print("Unsupported platform") sys.exit() @@ -1428,6 +1434,9 @@ if __name__ == '__main__': option.add_argument( f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒 option.add_argument("--profile-directory=Default") + options.add_argument( + f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒 + options.add_argument("--profile-directory=Default") if c.headless: print("Headless mode") @@ -1444,7 +1453,7 @@ if __name__ == '__main__': threads = [] for i in c.id: - print(options) + # print(options) print("id: ", i) if c.read_type == "remote": print("remote") @@ -1504,21 +1513,33 @@ if __name__ == '__main__': threads.append(thread) thread.start() # Set the pause operation - if sys.platform != "linux": - Thread(target=check_pause, args=("p", event)).start() - else: - from pynput.keyboard import Key, Listener - # 使用监听器监听键盘输入 - with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener: - listener.join() - - time.sleep(5) + # if sys.platform != "linux": + # time.sleep(3) + # print("\n\n----------------------------------") + # print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。") + # print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.") + # print("----------------------------------\n\n") + # Thread(target=check_pause, args=("p", event)).start() + # else: + time.sleep(3) + press_time = {"duration": 0, "is_pressed": False} print("\n\n----------------------------------") print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。") print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.") print("----------------------------------\n\n") + # 使用监听器监听键盘输入 + try: + with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener: + listener.join() + except: + print("您的操作系统不支持暂停功能。") + print("Your operating system does not support the pause function.") + + + # print("线程长度:", len(threads) ) for thread in threads: + print() thread.join() for thread in threads: diff --git a/.temp_to_pub/EasySpider_Linux_x64/Code/utils.py b/.temp_to_pub/EasySpider_Linux_x64/Code/utils.py index fe4ab6f..67f139e 100644 --- a/.temp_to_pub/EasySpider_Linux_x64/Code/utils.py +++ b/.temp_to_pub/EasySpider_Linux_x64/Code/utils.py @@ -24,27 +24,57 @@ def is_valid_url(url): def lowercase_tags_in_xpath(xpath): return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath) -def on_press(key): - pass - -def on_release_creator(event): + +def on_press_creator(press_time, event): + def on_press(key): + try: + if key.char == 'p': + if press_time["is_pressed"] == False: # 没按下p键时,记录按下p键的时间 + press_time["duration"] = time.time() + press_time["is_pressed"] = True + else: # 按下p键时,判断按下p键的时间是否超过2.5秒 + duration = time.time() - press_time["duration"] + if duration > 2: + if event._flag == False: + print("任务执行中,长按p键暂停执行。") + print("Task is running, long press 'p' to pause.") + # 设置Event的值为True,使得线程b可以继续执行 + event.set() + else: + # 设置Event的值为False,使得线程b暂停执行 + print("任务已暂停,长按p键继续执行...") + print("Task paused, long press 'p' to continue...") + event.clear() + press_time["duration"] = time.time() + press_time["is_pressed"] = False + # print("按下p键时间:", press_time["duration"]) + except: + pass + return on_press + +def on_release_creator(event, press_time): def on_release(key): try: - if key.char == 'p': # 当按下esc键时,退出监听 - if event._flag == False: - print("任务执行中,长按p键暂停执行。") - print("Task is running, long press 'p' to pause.") - # 设置Event的值为True,使得线程b可以继续执行 - event.set() - else: - # 设置Event的值为False,使得线程b暂停执行 - print("任务已暂停,长按p键继续执行...") - print("Task paused, press 'p' to continue...") - event.clear() + # duration = time.time() - press_time["duration"] + # # print("松开p键时间:", time.time(), "Duration: ", duration) + # if duration > 2.5 and key.char == 'p': + # if event._flag == False: + # print("任务执行中,按p键暂停执行。") + # print("Task is running, press 'p' to pause.") + # # 设置Event的值为True,使得线程b可以继续执行 + # event.set() + # else: + # # 设置Event的值为False,使得线程b暂停执行 + # print("任务已暂停,按p键继续执行...") + # print("Task paused, press 'p' to continue...") + # event.clear() + # press_time["duration"] = time.time() + press_time["is_pressed"] = False except: pass return on_release + def check_pause(key, event): while True: if keyboard.is_pressed(key): # 按下p键,暂停程序 diff --git a/.temp_to_pub/EasySpider_Linux_x64/tasks/6.json b/.temp_to_pub/EasySpider_Linux_x64/tasks/6.json index 93842bf..18b527e 100644 --- a/.temp_to_pub/EasySpider_Linux_x64/tasks/6.json +++ b/.temp_to_pub/EasySpider_Linux_x64/tasks/6.json @@ -1 +1 @@ -{"id":6,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/9/2023, 8:26:20 AM","update_time":"7/9/2023, 8:26:20 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":4,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"452","value":"452"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/手机/数码"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://channel.jd.com/mensshoes.html"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4,5],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"alr5ipqth9iljup1kld","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"452","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}},{"id":5,"index":5,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":2,"contentType":0,"relative":false,"name":"参数2_链接地址","desc":"","relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","//a[contains(., '男鞋')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-5]/a"],"exampleValues":[{"num":0,"value":"https://channel.jd.com/mensshoes.html"}],"unique_index":"upvimqodqwljup1tlv","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file +{"id":6,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/9/2023, 8:26:20 AM","update_time":"7/9/2023, 10:07:35 PM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":4,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"452","value":"452"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/手机/数码"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://channel.jd.com/mensshoes.html"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4,5],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"alr5ipqth9iljup1kld","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"452","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}},{"id":5,"index":5,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":2,"contentType":0,"relative":false,"name":"参数2_链接地址","desc":"","relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","//a[contains(., '男鞋')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-5]/a"],"exampleValues":[{"num":0,"value":"https://channel.jd.com/mensshoes.html"}],"unique_index":"upvimqodqwljup1tlv","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/ElectronJS/EasySpider_en.crx b/ElectronJS/EasySpider_en.crx deleted file mode 100644 index 4b05fc6..0000000 Binary files a/ElectronJS/EasySpider_en.crx and /dev/null differ diff --git a/ElectronJS/EasySpider_zh.crx b/ElectronJS/EasySpider_zh.crx deleted file mode 100644 index 9d8e3ba..0000000 Binary files a/ElectronJS/EasySpider_zh.crx and /dev/null differ diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 19fd21a..0bf2879 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -1370,16 +1370,19 @@ if __name__ == '__main__': driver_path = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe") option.add_extension("EasySpider/resources/app/XPathHelper.crx") + options.add_extension("EasySpider/resources/app/XPathHelper.crx") elif sys.platform == "win32" and platform.architecture()[0] == "64bit": options.binary_location = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") driver_path = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe") option.add_extension("EasySpider/resources/app/XPathHelper.crx") + options.add_extension("EasySpider/resources/app/XPathHelper.crx") elif sys.platform == "linux" and platform.architecture()[0] == "64bit": options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64" option.add_extension("EasySpider/resources/app/XPathHelper.crx") + options.add_extension("EasySpider/resources/app/XPathHelper.crx") else: print("Unsupported platform") sys.exit()