Linux update

This commit is contained in:
Naibo Wang 2023-07-09 22:27:32 +08:00
parent fb99764222
commit edc33cd2e0
7 changed files with 85 additions and 29 deletions

View File

@ -1,4 +1,6 @@
EasySpider_MacOS_all_arch/easyspider_executestage EasySpider_MacOS_all_arch/easyspider_executestage
EasySpider_Linux64_x64/user_data
EasySpider_windows_x86/user_data
EasySpider EasySpider
EasySpider.app/ EasySpider.app/
EasySpider_windows_x64/user_data EasySpider_windows_x64/user_data

View File

@ -15,6 +15,8 @@ import time
import requests import requests
from urllib.parse import urljoin from urllib.parse import urljoin
from lxml import etree from lxml import etree
import undetected_chromedriver as uc
from pynput.keyboard import Key, Listener
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
@ -29,7 +31,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
import undetected_chromedriver as uc
import random import random
# import pandas as pd # import pandas as pd
from openpyxl import load_workbook, Workbook from openpyxl import load_workbook, Workbook
@ -42,7 +43,7 @@ from PIL import Image
# import uuid # import uuid
from threading import Thread, Event from threading import Thread, Event
from myChrome import MyChrome, MyUCChrome from myChrome import MyChrome, MyUCChrome
from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
desired_capabilities = DesiredCapabilities.CHROME desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none" desired_capabilities["pageLoadStrategy"] = "none"
@ -1326,6 +1327,8 @@ class BrowserThread(Thread):
if __name__ == '__main__': if __name__ == '__main__':
from multiprocessing import freeze_support
freeze_support() # 防止无限死循环多开
config = { config = {
"id": [0], "id": [0],
"saved_file_name": "", "saved_file_name": "",
@ -1367,16 +1370,19 @@ if __name__ == '__main__':
driver_path = os.path.join( driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe") os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
option.add_extension("EasySpider/resources/app/XPathHelper.crx") option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "win32" and platform.architecture()[0] == "64bit": elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
options.binary_location = os.path.join( options.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
driver_path = os.path.join( driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe") os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
option.add_extension("EasySpider/resources/app/XPathHelper.crx") option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "linux" and platform.architecture()[0] == "64bit": elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64" driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
option.add_extension("EasySpider/resources/app/XPathHelper.crx") option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
else: else:
print("Unsupported platform") print("Unsupported platform")
sys.exit() sys.exit()
@ -1428,6 +1434,9 @@ if __name__ == '__main__':
option.add_argument( option.add_argument(
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒 f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
option.add_argument("--profile-directory=Default") option.add_argument("--profile-directory=Default")
options.add_argument(
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
options.add_argument("--profile-directory=Default")
if c.headless: if c.headless:
print("Headless mode") print("Headless mode")
@ -1444,7 +1453,7 @@ if __name__ == '__main__':
threads = [] threads = []
for i in c.id: for i in c.id:
print(options) # print(options)
print("id: ", i) print("id: ", i)
if c.read_type == "remote": if c.read_type == "remote":
print("remote") print("remote")
@ -1504,21 +1513,33 @@ if __name__ == '__main__':
threads.append(thread) threads.append(thread)
thread.start() thread.start()
# Set the pause operation # Set the pause operation
if sys.platform != "linux": # if sys.platform != "linux":
Thread(target=check_pause, args=("p", event)).start() # time.sleep(3)
else: # print("\n\n----------------------------------")
from pynput.keyboard import Key, Listener # print("正在运行任务长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码如果想恢复任务的执行请再次长按p键。")
# 使用监听器监听键盘输入 # print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener: # print("----------------------------------\n\n")
listener.join() # Thread(target=check_pause, args=("p", event)).start()
# else:
time.sleep(5) time.sleep(3)
press_time = {"duration": 0, "is_pressed": False}
print("\n\n----------------------------------") print("\n\n----------------------------------")
print("正在运行任务长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码如果想恢复任务的执行请再次长按p键。") print("正在运行任务长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码如果想恢复任务的执行请再次长按p键。")
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.") print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
print("----------------------------------\n\n") print("----------------------------------\n\n")
# 使用监听器监听键盘输入
try:
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
listener.join()
except:
print("您的操作系统不支持暂停功能。")
print("Your operating system does not support the pause function.")
# print("线程长度:", len(threads) )
for thread in threads: for thread in threads:
print()
thread.join() thread.join()
for thread in threads: for thread in threads:

View File

@ -24,13 +24,17 @@ def is_valid_url(url):
def lowercase_tags_in_xpath(xpath): def lowercase_tags_in_xpath(xpath):
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath) return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
def on_press(key):
pass
def on_release_creator(event): def on_press_creator(press_time, event):
def on_release(key): def on_press(key):
try: try:
if key.char == 'p': # 当按下esc键时退出监听 if key.char == 'p':
if press_time["is_pressed"] == False: # 没按下p键时记录按下p键的时间
press_time["duration"] = time.time()
press_time["is_pressed"] = True
else: # 按下p键时判断按下p键的时间是否超过2.5秒
duration = time.time() - press_time["duration"]
if duration > 2:
if event._flag == False: if event._flag == False:
print("任务执行中长按p键暂停执行。") print("任务执行中长按p键暂停执行。")
print("Task is running, long press 'p' to pause.") print("Task is running, long press 'p' to pause.")
@ -39,12 +43,38 @@ def on_release_creator(event):
else: else:
# 设置Event的值为False使得线程b暂停执行 # 设置Event的值为False使得线程b暂停执行
print("任务已暂停长按p键继续执行...") print("任务已暂停长按p键继续执行...")
print("Task paused, press 'p' to continue...") print("Task paused, long press 'p' to continue...")
event.clear() event.clear()
press_time["duration"] = time.time()
press_time["is_pressed"] = False
# print("按下p键时间", press_time["duration"])
except:
pass
return on_press
def on_release_creator(event, press_time):
def on_release(key):
try:
# duration = time.time() - press_time["duration"]
# # print("松开p键时间", time.time(), "Duration: ", duration)
# if duration > 2.5 and key.char == 'p':
# if event._flag == False:
# print("任务执行中按p键暂停执行。")
# print("Task is running, press 'p' to pause.")
# # 设置Event的值为True使得线程b可以继续执行
# event.set()
# else:
# # 设置Event的值为False使得线程b暂停执行
# print("任务已暂停按p键继续执行...")
# print("Task paused, press 'p' to continue...")
# event.clear()
# press_time["duration"] = time.time()
press_time["is_pressed"] = False
except: except:
pass pass
return on_release return on_release
def check_pause(key, event): def check_pause(key, event):
while True: while True:
if keyboard.is_pressed(key): # 按下p键暂停程序 if keyboard.is_pressed(key): # 按下p键暂停程序

View File

@ -1 +1 @@
{"id":6,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/9/2023, 8:26:20 AM","update_time":"7/9/2023, 8:26:20 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":4,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"452","value":"452"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/手机/数码"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://channel.jd.com/mensshoes.html"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4,5],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"alr5ipqth9iljup1kld","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"452","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}},{"id":5,"index":5,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":2,"contentType":0,"relative":false,"name":"参数2_链接地址","desc":"","relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","//a[contains(., '男鞋')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-5]/a"],"exampleValues":[{"num":0,"value":"https://channel.jd.com/mensshoes.html"}],"unique_index":"upvimqodqwljup1tlv","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} {"id":6,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/9/2023, 8:26:20 AM","update_time":"7/9/2023, 10:07:35 PM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":4,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"452","value":"452"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/手机/数码"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://channel.jd.com/mensshoes.html"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4,5],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"alr5ipqth9iljup1kld","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"452","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}},{"id":5,"index":5,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":2,"contentType":0,"relative":false,"name":"参数2_链接地址","desc":"","relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","//a[contains(., '男鞋')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-5]/a"],"exampleValues":[{"num":0,"value":"https://channel.jd.com/mensshoes.html"}],"unique_index":"upvimqodqwljup1tlv","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}

Binary file not shown.

Binary file not shown.

View File

@ -1370,16 +1370,19 @@ if __name__ == '__main__':
driver_path = os.path.join( driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe") os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
option.add_extension("EasySpider/resources/app/XPathHelper.crx") option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "win32" and platform.architecture()[0] == "64bit": elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
options.binary_location = os.path.join( options.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
driver_path = os.path.join( driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe") os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
option.add_extension("EasySpider/resources/app/XPathHelper.crx") option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "linux" and platform.architecture()[0] == "64bit": elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64" driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
option.add_extension("EasySpider/resources/app/XPathHelper.crx") option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
else: else:
print("Unsupported platform") print("Unsupported platform")
sys.exit() sys.exit()