mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-22 12:09:35 +08:00
Linux update
This commit is contained in:
parent
fb99764222
commit
edc33cd2e0
2
.temp_to_pub/.gitignore
vendored
2
.temp_to_pub/.gitignore
vendored
@ -1,4 +1,6 @@
|
||||
EasySpider_MacOS_all_arch/easyspider_executestage
|
||||
EasySpider_Linux64_x64/user_data
|
||||
EasySpider_windows_x86/user_data
|
||||
EasySpider
|
||||
EasySpider.app/
|
||||
EasySpider_windows_x64/user_data
|
||||
|
@ -15,6 +15,8 @@ import time
|
||||
import requests
|
||||
from urllib.parse import urljoin
|
||||
from lxml import etree
|
||||
import undetected_chromedriver as uc
|
||||
from pynput.keyboard import Key, Listener
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
@ -29,7 +31,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
from selenium.webdriver.support.ui import Select
|
||||
from selenium.webdriver import ActionChains
|
||||
from selenium.webdriver.common.by import By
|
||||
import undetected_chromedriver as uc
|
||||
import random
|
||||
# import pandas as pd
|
||||
from openpyxl import load_workbook, Workbook
|
||||
@ -42,7 +43,7 @@ from PIL import Image
|
||||
# import uuid
|
||||
from threading import Thread, Event
|
||||
from myChrome import MyChrome, MyUCChrome
|
||||
from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel
|
||||
from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
|
||||
desired_capabilities = DesiredCapabilities.CHROME
|
||||
desired_capabilities["pageLoadStrategy"] = "none"
|
||||
|
||||
@ -1326,6 +1327,8 @@ class BrowserThread(Thread):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from multiprocessing import freeze_support
|
||||
freeze_support() # 防止无限死循环多开
|
||||
config = {
|
||||
"id": [0],
|
||||
"saved_file_name": "",
|
||||
@ -1367,16 +1370,19 @@ if __name__ == '__main__':
|
||||
driver_path = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
|
||||
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
|
||||
options.binary_location = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
|
||||
driver_path = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
|
||||
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
|
||||
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
|
||||
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
|
||||
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
else:
|
||||
print("Unsupported platform")
|
||||
sys.exit()
|
||||
@ -1428,6 +1434,9 @@ if __name__ == '__main__':
|
||||
option.add_argument(
|
||||
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
|
||||
option.add_argument("--profile-directory=Default")
|
||||
options.add_argument(
|
||||
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
|
||||
options.add_argument("--profile-directory=Default")
|
||||
|
||||
if c.headless:
|
||||
print("Headless mode")
|
||||
@ -1444,7 +1453,7 @@ if __name__ == '__main__':
|
||||
|
||||
threads = []
|
||||
for i in c.id:
|
||||
print(options)
|
||||
# print(options)
|
||||
print("id: ", i)
|
||||
if c.read_type == "remote":
|
||||
print("remote")
|
||||
@ -1504,21 +1513,33 @@ if __name__ == '__main__':
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
# Set the pause operation
|
||||
if sys.platform != "linux":
|
||||
Thread(target=check_pause, args=("p", event)).start()
|
||||
else:
|
||||
from pynput.keyboard import Key, Listener
|
||||
# 使用监听器监听键盘输入
|
||||
with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
|
||||
listener.join()
|
||||
|
||||
time.sleep(5)
|
||||
# if sys.platform != "linux":
|
||||
# time.sleep(3)
|
||||
# print("\n\n----------------------------------")
|
||||
# print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
|
||||
# print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
|
||||
# print("----------------------------------\n\n")
|
||||
# Thread(target=check_pause, args=("p", event)).start()
|
||||
# else:
|
||||
time.sleep(3)
|
||||
press_time = {"duration": 0, "is_pressed": False}
|
||||
print("\n\n----------------------------------")
|
||||
print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
|
||||
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
|
||||
print("----------------------------------\n\n")
|
||||
# 使用监听器监听键盘输入
|
||||
try:
|
||||
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
|
||||
listener.join()
|
||||
except:
|
||||
print("您的操作系统不支持暂停功能。")
|
||||
print("Your operating system does not support the pause function.")
|
||||
|
||||
|
||||
# print("线程长度:", len(threads) )
|
||||
|
||||
for thread in threads:
|
||||
print()
|
||||
thread.join()
|
||||
|
||||
for thread in threads:
|
||||
|
@ -24,27 +24,57 @@ def is_valid_url(url):
|
||||
def lowercase_tags_in_xpath(xpath):
|
||||
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
|
||||
|
||||
def on_press(key):
|
||||
pass
|
||||
|
||||
def on_release_creator(event):
|
||||
def on_press_creator(press_time, event):
|
||||
def on_press(key):
|
||||
try:
|
||||
if key.char == 'p':
|
||||
if press_time["is_pressed"] == False: # 没按下p键时,记录按下p键的时间
|
||||
press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = True
|
||||
else: # 按下p键时,判断按下p键的时间是否超过2.5秒
|
||||
duration = time.time() - press_time["duration"]
|
||||
if duration > 2:
|
||||
if event._flag == False:
|
||||
print("任务执行中,长按p键暂停执行。")
|
||||
print("Task is running, long press 'p' to pause.")
|
||||
# 设置Event的值为True,使得线程b可以继续执行
|
||||
event.set()
|
||||
else:
|
||||
# 设置Event的值为False,使得线程b暂停执行
|
||||
print("任务已暂停,长按p键继续执行...")
|
||||
print("Task paused, long press 'p' to continue...")
|
||||
event.clear()
|
||||
press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = False
|
||||
# print("按下p键时间:", press_time["duration"])
|
||||
except:
|
||||
pass
|
||||
return on_press
|
||||
|
||||
def on_release_creator(event, press_time):
|
||||
def on_release(key):
|
||||
try:
|
||||
if key.char == 'p': # 当按下esc键时,退出监听
|
||||
if event._flag == False:
|
||||
print("任务执行中,长按p键暂停执行。")
|
||||
print("Task is running, long press 'p' to pause.")
|
||||
# 设置Event的值为True,使得线程b可以继续执行
|
||||
event.set()
|
||||
else:
|
||||
# 设置Event的值为False,使得线程b暂停执行
|
||||
print("任务已暂停,长按p键继续执行...")
|
||||
print("Task paused, press 'p' to continue...")
|
||||
event.clear()
|
||||
# duration = time.time() - press_time["duration"]
|
||||
# # print("松开p键时间:", time.time(), "Duration: ", duration)
|
||||
# if duration > 2.5 and key.char == 'p':
|
||||
# if event._flag == False:
|
||||
# print("任务执行中,按p键暂停执行。")
|
||||
# print("Task is running, press 'p' to pause.")
|
||||
# # 设置Event的值为True,使得线程b可以继续执行
|
||||
# event.set()
|
||||
# else:
|
||||
# # 设置Event的值为False,使得线程b暂停执行
|
||||
# print("任务已暂停,按p键继续执行...")
|
||||
# print("Task paused, press 'p' to continue...")
|
||||
# event.clear()
|
||||
# press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = False
|
||||
except:
|
||||
pass
|
||||
return on_release
|
||||
|
||||
|
||||
def check_pause(key, event):
|
||||
while True:
|
||||
if keyboard.is_pressed(key): # 按下p键,暂停程序
|
||||
|
@ -1 +1 @@
|
||||
{"id":6,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/9/2023, 8:26:20 AM","update_time":"7/9/2023, 8:26:20 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":4,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"452","value":"452"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/手机/数码"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://channel.jd.com/mensshoes.html"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4,5],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"alr5ipqth9iljup1kld","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"452","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}},{"id":5,"index":5,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":2,"contentType":0,"relative":false,"name":"参数2_链接地址","desc":"","relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","//a[contains(., '男鞋')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-5]/a"],"exampleValues":[{"num":0,"value":"https://channel.jd.com/mensshoes.html"}],"unique_index":"upvimqodqwljup1tlv","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
|
||||
{"id":6,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/9/2023, 8:26:20 AM","update_time":"7/9/2023, 10:07:35 PM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":4,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"452","value":"452"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/手机/数码"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://channel.jd.com/mensshoes.html"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4,5],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"alr5ipqth9iljup1kld","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"452","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}},{"id":5,"index":5,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":2,"contentType":0,"relative":false,"name":"参数2_链接地址","desc":"","relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[8]/a[3]","//a[contains(., '男鞋')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-5]/a"],"exampleValues":[{"num":0,"value":"https://channel.jd.com/mensshoes.html"}],"unique_index":"upvimqodqwljup1tlv","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
|
Binary file not shown.
Binary file not shown.
@ -1370,16 +1370,19 @@ if __name__ == '__main__':
|
||||
driver_path = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
|
||||
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
|
||||
options.binary_location = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
|
||||
driver_path = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
|
||||
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
|
||||
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
|
||||
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
|
||||
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
else:
|
||||
print("Unsupported platform")
|
||||
sys.exit()
|
||||
|
Loading…
x
Reference in New Issue
Block a user