diff --git a/.temp_to_pub/.gitignore b/.temp_to_pub/.gitignore index 77e2f3e..f703a93 100644 --- a/.temp_to_pub/.gitignore +++ b/.temp_to_pub/.gitignore @@ -5,6 +5,7 @@ EasySpider EasySpider.app/ EasySpider_windows_x64/user_data *.tmp +*.tar.gz *.7z* config.json mysql_config.json diff --git a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py index 6f66ab4..c1aa0ec 100644 --- a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py +++ b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py @@ -15,6 +15,8 @@ import time import requests from urllib.parse import urljoin from lxml import etree +# import undetected_chromedriver as uc +from pynput.keyboard import Key, Listener from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.action_chains import ActionChains @@ -29,7 +31,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.support.ui import Select from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By -import undetected_chromedriver as uc import random # import pandas as pd from openpyxl import load_workbook, Workbook @@ -41,8 +42,10 @@ import pytesseract from PIL import Image # import uuid from threading import Thread, Event -from myChrome import MyChrome, MyUCChrome -from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel +from myChrome import MyChrome +if sys.platform != "darwin": + from myChrome import MyUCChrome +from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel desired_capabilities = DesiredCapabilities.CHROME desired_capabilities["pageLoadStrategy"] = "none" @@ -1326,6 +1329,8 @@ class BrowserThread(Thread): if __name__ == '__main__': + # from multiprocessing import freeze_support + # freeze_support() # 防止无限死循环多开 config = { "id": [0], "saved_file_name": "", @@ -1358,6 +1363,9 @@ if __name__ == '__main__': # option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome" # driver_path = os.getcwd()+ "/chromedriver_mac64" print(driver_path) + if c.config_folder == "": + c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/") + # print("Config folder for MacOS:", c.config_folder) elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径 print("Finding chromedriver in EasySpider", os.getcwd()+"/EasySpider") @@ -1367,16 +1375,19 @@ if __name__ == '__main__': driver_path = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe") option.add_extension("EasySpider/resources/app/XPathHelper.crx") + options.add_extension("EasySpider/resources/app/XPathHelper.crx") elif sys.platform == "win32" and platform.architecture()[0] == "64bit": options.binary_location = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") driver_path = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe") option.add_extension("EasySpider/resources/app/XPathHelper.crx") + options.add_extension("EasySpider/resources/app/XPathHelper.crx") elif sys.platform == "linux" and platform.architecture()[0] == "64bit": options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64" option.add_extension("EasySpider/resources/app/XPathHelper.crx") + options.add_extension("EasySpider/resources/app/XPathHelper.crx") else: print("Unsupported platform") sys.exit() @@ -1419,6 +1430,7 @@ if __name__ == '__main__': try: with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f: config = json.load(f) + print("Config file path: " + c.config_folder + c.config_file_name) absolute_user_data_folder = config["absolute_user_data_folder"] print("\nAbsolute_user_data_folder:", absolute_user_data_folder, "\n") @@ -1428,6 +1440,9 @@ if __name__ == '__main__': option.add_argument( f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒 option.add_argument("--profile-directory=Default") + options.add_argument( + f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒 + options.add_argument("--profile-directory=Default") if c.headless: print("Headless mode") @@ -1444,7 +1459,7 @@ if __name__ == '__main__': threads = [] for i in c.id: - print(options) + # print(options) print("id: ", i) if c.read_type == "remote": print("remote") @@ -1492,10 +1507,15 @@ if __name__ == '__main__': browser_t = MyChrome( options=options, chrome_options=option, executable_path=driver_path) elif cloudflare == 1: - browser_t = MyUCChrome( - options=options, chrome_options=option, executable_path=driver_path) - print("Pass Cloudflare Mode") - print("过Cloudflare验证模式") + if sys.platform != "darwin": + browser_t = MyUCChrome( + options=options, chrome_options=option, driver_executable_path=driver_path) + print("Pass Cloudflare Mode") + print("过Cloudflare验证模式") + else: + print("Not support Cloudflare Mode on MacOS") + print("MacOS不支持Cloudflare验证模式") + sys.exit() event = Event() event.set() thread = BrowserThread(browser_t, i, service, @@ -1505,26 +1525,33 @@ if __name__ == '__main__': thread.start() # Set the pause operation # if sys.platform != "linux": + # time.sleep(3) + # print("\n\n----------------------------------") + # print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。") + # print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.") + # print("----------------------------------\n\n") # Thread(target=check_pause, args=("p", event)).start() # else: time.sleep(3) + press_time = {"duration": 0, "is_pressed": False} print("\n\n----------------------------------") - print("正在运行任务,按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次按p键。") - print("Running task, press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please press 'p' again.") + print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。") + print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.") print("----------------------------------\n\n") # 使用监听器监听键盘输入 try: - from pynput.keyboard import Key, Listener - with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener: + with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener: listener.join() except: - print("您的操作系统不支持暂停功能。") - print("Your operating system does not support the pause function.") + pass + # print("您的操作系统不支持暂停功能。") + # print("Your operating system does not support the pause function.") - + # print("线程长度:", len(threads) ) for thread in threads: + print() thread.join() for thread in threads: diff --git a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py index f54f0a5..27b55e2 100644 --- a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py +++ b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py @@ -12,7 +12,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.support.ui import Select from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By -import undetected_chromedriver as uc desired_capabilities = DesiredCapabilities.CHROME desired_capabilities["pageLoadStrategy"] = "none" @@ -89,77 +88,80 @@ class MyChrome(webdriver.Chrome): raise NoSuchElementException else: return super().find_elements(by=by, value=value) - -class MyUCChrome(uc.Chrome): +import sys +if sys.platform != "darwin": # MacOS不支持Cloudflare + import undetected_chromedriver_ES as uc - def __init__(self, *args, **kwargs): - self.iframe_env = False # 现在的环境是root还是iframe - super().__init__(*args, **kwargs) # 调用父类的 __init__ + class MyUCChrome(uc.Chrome): - def find_element(self, by=By.ID, value=None, iframe=False): - # 在这里改变查找元素的行为 - if self.iframe_env: - super().switch_to.default_content() - self.iframe_env = False - if iframe: - # 获取所有的 iframe - try: - iframes = super().find_elements(By.CSS_SELECTOR, "iframe") - except Exception as e: - print(e) - find_element = False - # 遍历所有的 iframe 并点击里面的元素 - for iframe in iframes: - # 切换到 iframe + def __init__(self, *args, **kwargs): + self.iframe_env = False # 现在的环境是root还是iframe + super().__init__(*args, **kwargs) # 调用父类的 __init__ + + def find_element(self, by=By.ID, value=None, iframe=False): + # 在这里改变查找元素的行为 + if self.iframe_env: super().switch_to.default_content() - super().switch_to.frame(iframe) - self.iframe_env = True - try: - # 在 iframe 中查找并点击元素 - # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 - element = super().find_element(by=by, value=value) - find_element = True - except: - print("No such element found in the iframe") - # 完成操作后切回主文档 - # super().switch_to.default_content() - if find_element: - return element - if not find_element: - raise NoSuchElementException - else: - return super().find_element(by=by, value=value) - - def find_elements(self, by=By.ID, value=None, iframe=False): - # 在这里改变查找元素的行为 - if self.iframe_env: - super().switch_to.default_content() - self.iframe_env = False - if iframe: - # 获取所有的 iframe - iframes = super().find_elements(By.CSS_SELECTOR, "iframe") - find_element = False - # 遍历所有的 iframe 并点击里面的元素 - for iframe in iframes: - # 切换到 iframe + self.iframe_env = False + if iframe: + # 获取所有的 iframe try: + iframes = super().find_elements(By.CSS_SELECTOR, "iframe") + except Exception as e: + print(e) + find_element = False + # 遍历所有的 iframe 并点击里面的元素 + for iframe in iframes: + # 切换到 iframe super().switch_to.default_content() super().switch_to.frame(iframe) self.iframe_env = True - # 在 iframe 中查找并点击元素 - # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 - elements = super().find_elements(by=by, value=value) - if len(elements) > 0: + try: + # 在 iframe 中查找并点击元素 + # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 + element = super().find_element(by=by, value=value) find_element = True + except: + print("No such element found in the iframe") # 完成操作后切回主文档 # super().switch_to.default_content() if find_element: - return elements - except: - print("No such element found in the iframe") - if not find_element: - raise NoSuchElementException - else: - return super().find_elements(by=by, value=value) + return element + if not find_element: + raise NoSuchElementException + else: + return super().find_element(by=by, value=value) + + def find_elements(self, by=By.ID, value=None, iframe=False): + # 在这里改变查找元素的行为 + if self.iframe_env: + super().switch_to.default_content() + self.iframe_env = False + if iframe: + # 获取所有的 iframe + iframes = super().find_elements(By.CSS_SELECTOR, "iframe") + find_element = False + # 遍历所有的 iframe 并点击里面的元素 + for iframe in iframes: + # 切换到 iframe + try: + super().switch_to.default_content() + super().switch_to.frame(iframe) + self.iframe_env = True + # 在 iframe 中查找并点击元素 + # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 + elements = super().find_elements(by=by, value=value) + if len(elements) > 0: + find_element = True + # 完成操作后切回主文档 + # super().switch_to.default_content() + if find_element: + return elements + except: + print("No such element found in the iframe") + if not find_element: + raise NoSuchElementException + else: + return super().find_elements(by=by, value=value) diff --git a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py index c499055..bc788ca 100644 --- a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py +++ b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py @@ -4,6 +4,7 @@ import csv import datetime import json import os +import sys import re import time import uuid @@ -23,27 +24,57 @@ def is_valid_url(url): def lowercase_tags_in_xpath(xpath): return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath) - -def on_release_creator(event): + + +def on_press_creator(press_time, event): + def on_press(key): + try: + if key.char == 'p': + if press_time["is_pressed"] == False: # 没按下p键时,记录按下p键的时间 + press_time["duration"] = time.time() + press_time["is_pressed"] = True + else: # 按下p键时,判断按下p键的时间是否超过2.5秒 + duration = time.time() - press_time["duration"] + if duration > 2: + if event._flag == False: + print("任务执行中,长按p键暂停执行。") + print("Task is running, long press 'p' to pause.") + # 设置Event的值为True,使得线程b可以继续执行 + event.set() + else: + # 设置Event的值为False,使得线程b暂停执行 + print("任务已暂停,长按p键继续执行...") + print("Task paused, long press 'p' to continue...") + event.clear() + press_time["duration"] = time.time() + press_time["is_pressed"] = False + # print("按下p键时间:", press_time["duration"]) + except: + pass + return on_press + +def on_release_creator(event, press_time): def on_release(key): try: - if key.char == 'p': # 当按下esc键时,退出监听 - if event._flag == False: - print("任务执行中,按p键暂停执行。") - print("Task is running, press 'p' to pause.") - # 设置Event的值为True,使得线程b可以继续执行 - event.set() - else: - # 设置Event的值为False,使得线程b暂停执行 - print("任务已暂停,按p键继续执行...") - print("Task paused, press 'p' to continue...") - event.clear() + # duration = time.time() - press_time["duration"] + # # print("松开p键时间:", time.time(), "Duration: ", duration) + # if duration > 2.5 and key.char == 'p': + # if event._flag == False: + # print("任务执行中,按p键暂停执行。") + # print("Task is running, press 'p' to pause.") + # # 设置Event的值为True,使得线程b可以继续执行 + # event.set() + # else: + # # 设置Event的值为False,使得线程b暂停执行 + # print("任务已暂停,按p键继续执行...") + # print("Task paused, press 'p' to continue...") + # event.clear() + # press_time["duration"] = time.time() + press_time["is_pressed"] = False except: pass return on_release -def on_press(key): - pass # def check_pause(key, event): # while True: @@ -189,16 +220,22 @@ class myMySQL: def __init__(self, config_file="mysql_config.json"): # 读取配置文件 try: + if sys.platform == "darwin": + if config_file.find("./") >= 0: + config_file = config_file.replace("./", "") + config_file = os.path.expanduser("~/Library/Application Support/EasySpider/" + config_file) + print("MySQL config file path: ", config_file) with open(config_file, 'r') as f: config = json.load(f) host = config["host"] port = config["port"] - user = config["user"] + user = config["username"] passwd = config["password"] db = config["database"] - except: + except Exception as e: print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在。") print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.") + print(e) try: self.conn = pymysql.connect( host=host, port=port, user=user, passwd=passwd, db=db) diff --git a/.temp_to_pub/compress.cmd b/.temp_to_pub/compress.cmd old mode 100644 new mode 100755 index 135a26d..a5e8251 --- a/.temp_to_pub/compress.cmd +++ b/.temp_to_pub/compress.cmd @@ -1 +1 @@ -python compress.py \ No newline at end of file +python3 compress.py diff --git a/.temp_to_pub/compress.py b/.temp_to_pub/compress.py index 7393827..6dc31b4 100644 --- a/.temp_to_pub/compress.py +++ b/.temp_to_pub/compress.py @@ -45,7 +45,10 @@ def compress_folder_to_7z_split(folder_path, output_file): try: subprocess.call(["7z", "a", "-v95m", output_file, folder_path]) except: - subprocess.call(["7za", "a", "-v95m", output_file, folder_path]) + try: + subprocess.call(["7za", "a", "-v95m", output_file, folder_path]) + except: + subprocess.call(["7zz", "a", "-v95m", output_file, folder_path]) easyspider_version = "0.3.5" @@ -104,5 +107,11 @@ if __name__ == "__main__": subprocess.call(["tar", "-Jcvf", file_name, "./EasySpider_Linux_x64"]) print(f"Compress {file_name} successfully!") elif sys.platform == "darwin" and platform.architecture()[0] == "64bit": - pass + file_name = f"EasySpider_{easyspider_version}_MacOS_all_arch.tar.gz" + if os.path.exists("./EasySpider_MacOS_all_arch/Data"): + shutil.rmtree("./EasySpider_MacOS_all_arch/Data") + os.mkdir("./EasySpider_MacOS_all_arch/Data") + subprocess.call(["tar", "-zcvf", file_name, "./EasySpider_MacOS_all_arch"]) + subprocess.call(["7zz", "a", "-v95m", file_name.replace(".tar.gz", ".7z"), file_name, "请继续解压EasySpider_MacOS_all_arch.tar.gz使用.txt"]) + print(f"Compress {file_name} successfully!") diff --git a/.temp_to_pub/请继续解压EasySpider_MacOS_all_arch.tar.gz使用.txt b/.temp_to_pub/请继续解压EasySpider_MacOS_all_arch.tar.gz使用.txt new file mode 100644 index 0000000..4feeecf --- /dev/null +++ b/.temp_to_pub/请继续解压EasySpider_MacOS_all_arch.tar.gz使用.txt @@ -0,0 +1 @@ +请继续解压.tar.gz文件以使用易采集。 diff --git a/.temp_to_pub/请继续解压zip文件以使用EasySpider.txt b/.temp_to_pub/请继续解压zip文件以使用EasySpider.txt deleted file mode 100644 index e69de29..0000000 diff --git a/ElectronJS/EasySpider_en.crx b/ElectronJS/EasySpider_en.crx new file mode 100644 index 0000000..175b4a4 Binary files /dev/null and b/ElectronJS/EasySpider_en.crx differ diff --git a/ElectronJS/EasySpider_zh.crx b/ElectronJS/EasySpider_zh.crx new file mode 100644 index 0000000..330c910 Binary files /dev/null and b/ElectronJS/EasySpider_zh.crx differ diff --git a/ElectronJS/config.json b/ElectronJS/config.json index 4cc4016..35cd969 100644 --- a/ElectronJS/config.json +++ b/ElectronJS/config.json @@ -1 +1 @@ -{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"} \ No newline at end of file +{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data1","debug":false,"mysql_config_path":"/Users/naibowang/Documents/EasySpider/ElectronJS/mysql_config.json","absolute_user_data_folder":"/Users/naibowang/Documents/EasySpider/ElectronJS/user_data1"} \ No newline at end of file diff --git a/ElectronJS/main.js b/ElectronJS/main.js index 7731b4b..dce33e1 100644 --- a/ElectronJS/main.js +++ b/ElectronJS/main.js @@ -324,7 +324,9 @@ async function beginInvoke(msg, ws) { config.absolute_user_data_folder = user_data_folder_path; fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config)); } - config.mysql_config_path = msg.message.mysql_config_path; + if(msg.message.mysql_config_path != "-1"){ + config.mysql_config_path = msg.message.mysql_config_path; + } fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config)); // child('Chrome/easyspider_executestage.exe', parameters, function(err,stdout, stderr) { // console.log(stdout); diff --git a/ElectronJS/package_macos.sh b/ElectronJS/package_macos.sh index b2cf8b8..dad66ec 100755 --- a/ElectronJS/package_macos.sh +++ b/ElectronJS/package_macos.sh @@ -23,4 +23,4 @@ cp ../ExecuteStage/easyspider_executestage.py ../.temp_to_pub/EasySpider_MacOS_a cp ../ExecuteStage/myChrome.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code cp ../ExecuteStage/utils.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code cp ../ExecuteStage/requirements.txt ../.temp_to_pub/EasySpider_MacOS_all_arch/Code -cp -Rf ../undetected_chromedriver_ES ../.temp_to_pub/EasySpider_MacOS_all_arch/Code +cp -Rf ../ExecuteStage/undetected_chromedriver_ES ../.temp_to_pub/EasySpider_MacOS_all_arch/Code diff --git a/ElectronJS/src/taskGrid/FlowChart.html b/ElectronJS/src/taskGrid/FlowChart.html index bfc7c0e..0837b53 100644 --- a/ElectronJS/src/taskGrid/FlowChart.html +++ b/ElectronJS/src/taskGrid/FlowChart.html @@ -563,7 +563,7 @@ Is it an extreme anti-scraping website like Cloudflare? No - Yes + Yes (Not support on MacOS, unless compile by yourself) Browser Emulation Type: diff --git a/ElectronJS/src/taskGrid/FlowChart_CN.html b/ElectronJS/src/taskGrid/FlowChart_CN.html index 0b77836..57a0d7a 100644 --- a/ElectronJS/src/taskGrid/FlowChart_CN.html +++ b/ElectronJS/src/taskGrid/FlowChart_CN.html @@ -563,7 +563,7 @@ 是否为Cloudflare等极端反爬网站(查看Cloudflare设计和执行教程): 否 - 是 + 是(MacOS不支持直接运行,但可以自行编译) 浏览器模拟类型: diff --git a/ElectronJS/src/taskGrid/invokeTask.html b/ElectronJS/src/taskGrid/invokeTask.html index ceff3e8..1f5428c 100644 --- a/ElectronJS/src/taskGrid/invokeTask.html +++ b/ElectronJS/src/taskGrid/invokeTask.html @@ -209,7 +209,7 @@ - {{"MySQL configuration file Path:~MySQL配置文件路径:" | lang}} + {{"MySQL configuration file Path, relative to this folder:~MySQL配置文件路径,路径相对此文件夹:" | lang}} {{config_folder}} @@ -485,13 +485,23 @@ ws.onopen = function () { // Web Socket 已连接上,使用 send() 方法发送数据 console.log("Connected"); - message = { + let message = { type: 0, //消息类型,0代表链接操作 message: { id: 1, //socket id } }; this.send(JSON.stringify(message)); + message = { //显示flowchart + type: 5, //消息类型,调用执行程序 + message: { + "id": -1, + "user_data_folder": "", + "mysql_config_path": "-1", + "execute_type": 1, + } + }; + this.send(JSON.stringify(message)); }; ws.onmessage = function(message){ message = JSON.parse(message.data); diff --git a/ElectronJS/tasks/157.json b/ElectronJS/tasks/157.json index 3357b1e..a84df4d 100644 --- a/ElectronJS/tasks/157.json +++ b/ElectronJS/tasks/157.json @@ -1 +1 @@ -{"id":157,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"2023/7/9 10:41:47","update_time":"2023/7/9 10:41:47","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"urlList_1","nodeId":2,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/"},{"id":1,"name":"参数2_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"手机"},{"id":2,"name":"参数3_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://shouji.jd.com/"},{"id":3,"name":"参数4_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/"},{"id":4,"name":"参数5_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"数码"},{"id":5,"name":"参数6_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://shuma.jd.com/"},{"id":6,"name":"参数7_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/"},{"id":7,"name":"参数8_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"厨具"},{"id":8,"name":"参数9_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://channel.jd.com/kitchenware.html"},{"id":9,"name":"参数10_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/"},{"id":10,"name":"参数11_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"工业品"},{"id":11,"name":"参数12_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://pro.jd.com/mall/active/2u2DR1dUiK34csAE3DqmcG8aXvUK/index.html"},{"id":12,"name":"参数13_图片地址","desc":"","type":"text","recordASField":1,"exampleValue":"//m.360buyimg.com/babel/s1125x600_jfs/t1/156011/19/36990/85599/646c850aF5e22eaa0/87641bfb5cf707ba.jpg!q70.dpg"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":1,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":3,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[4],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":4,"index":4,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":1,"relative":true,"name":"参数1_文本","desc":"","relativeXPath":"/span[1]","allXPaths":["/span[1]","//span[contains(., '/')]","//SPAN[@class='LeftSide_cate_menu_line__vzQu9 LeftSide_fore0__r2Yrl']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/span[last()-1]"],"exampleValues":[{"num":0,"value":"/"}],"unique_index":"/span[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":1,"contentType":0,"relative":true,"name":"参数2_链接文本","desc":"","relativeXPath":"/a[1]","allXPaths":["/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"],"exampleValues":[{"num":0,"value":"手机"}],"unique_index":"/a[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数3_链接地址","desc":"","relativeXPath":"/a[1]","allXPaths":["/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"],"exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"/a[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数4_文本","desc":"","relativeXPath":"/span[2]","allXPaths":["/span[2]","//span[contains(., '/')]","//SPAN[@class='LeftSide_cate_menu_line__vzQu9 undefined']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/span"],"exampleValues":[{"num":0,"value":"/"}],"unique_index":"/span[2]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":1,"contentType":0,"relative":true,"name":"参数5_链接文本","desc":"","relativeXPath":"/a[2]","allXPaths":["/a[2]","//a[contains(., '数码')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a"],"exampleValues":[{"num":0,"value":"数码"}],"unique_index":"/a[2]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数6_链接地址","desc":"","relativeXPath":"/a[2]","allXPaths":["/a[2]","//a[contains(., '数码')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a"],"exampleValues":[{"num":0,"value":"https://shuma.jd.com/"}],"unique_index":"/a[2]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数7_文本","desc":"","relativeXPath":"/span[3]","allXPaths":["/span[3]","//span[contains(., '/')]","//SPAN[@class='LeftSide_cate_menu_line__vzQu9 undefined']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-9]/span"],"exampleValues":[{"num":3,"value":"/"}],"unique_index":"/span[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":1,"contentType":0,"relative":true,"name":"参数8_链接文本","desc":"","relativeXPath":"/a[3]","allXPaths":["/a[3]","//a[contains(., '厨具')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-9]/a"],"exampleValues":[{"num":3,"value":"厨具"}],"unique_index":"/a[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数9_链接地址","desc":"","relativeXPath":"/a[3]","allXPaths":["/a[3]","//a[contains(., '厨具')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-9]/a"],"exampleValues":[{"num":3,"value":"https://channel.jd.com/kitchenware.html"}],"unique_index":"/a[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数10_文本","desc":"","relativeXPath":"/span[4]","allXPaths":["/span[4]","//span[contains(., '/')]","//SPAN[@class='LeftSide_cate_menu_line__vzQu9 undefined']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-8]/span"],"exampleValues":[{"num":4,"value":"/"}],"unique_index":"/span[4]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":1,"contentType":0,"relative":true,"name":"参数11_链接文本","desc":"","relativeXPath":"/a[4]","allXPaths":["/a[4]","//a[contains(., '工业品')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-8]/a"],"exampleValues":[{"num":4,"value":"工业品"}],"unique_index":"/a[4]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数12_链接地址","desc":"","relativeXPath":"/a[4]","allXPaths":["/a[4]","//a[contains(., '工业品')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-8]/a"],"exampleValues":[{"num":4,"value":"https://pro.jd.com/mall/active/2u2DR1dUiK34csAE3DqmcG8aXvUK/index.html"}],"unique_index":"/a[4]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":4,"contentType":0,"relative":false,"name":"参数13_图片地址","desc":"","extractType":0,"relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[3]/div[1]/div[1]/a[1]/img[1]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[3]/div[1]/div[1]/a[1]/img[1]","//img[contains(., '')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-1]/div/div[last()-1]/div/div[last()-1]/div/div[last()-6]/div/div/a/img"],"exampleValues":[{"num":0,"value":"//m.360buyimg.com/babel/s1125x600_jfs/t1/156011/19/36990/85599/646c850aF5e22eaa0/87641bfb5cf707ba.jpg!q70.dpg"}],"unique_index":"65z1z1niylfljutw14e","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]} \ No newline at end of file +{"id":157,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"2023/7/9 10:41:47","update_time":"2023/7/10 04:49:49","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"mysql","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"urlList_1","nodeId":2,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/"},{"id":1,"name":"参数2_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"手机"},{"id":2,"name":"参数3_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://shouji.jd.com/"},{"id":3,"name":"参数4_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/"},{"id":4,"name":"参数5_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"数码"},{"id":5,"name":"参数6_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://shuma.jd.com/"},{"id":6,"name":"参数7_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/"},{"id":7,"name":"参数8_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"厨具"},{"id":8,"name":"参数9_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://channel.jd.com/kitchenware.html"},{"id":9,"name":"参数10_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/"},{"id":10,"name":"参数11_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"工业品"},{"id":11,"name":"参数12_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://pro.jd.com/mall/active/2u2DR1dUiK34csAE3DqmcG8aXvUK/index.html"},{"id":12,"name":"参数13_图片地址","desc":"","type":"text","recordASField":1,"exampleValue":"//m.360buyimg.com/babel/s1125x600_jfs/t1/156011/19/36990/85599/646c850aF5e22eaa0/87641bfb5cf707ba.jpg!q70.dpg"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":1,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":3,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[4],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":4,"index":4,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":1,"relative":true,"name":"参数1_文本","desc":"","relativeXPath":"/span[1]","allXPaths":["/span[1]","//span[contains(., '/')]","//SPAN[@class='LeftSide_cate_menu_line__vzQu9 LeftSide_fore0__r2Yrl']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/span[last()-1]"],"exampleValues":[{"num":0,"value":"/"}],"unique_index":"/span[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":1,"contentType":0,"relative":true,"name":"参数2_链接文本","desc":"","relativeXPath":"/a[1]","allXPaths":["/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"],"exampleValues":[{"num":0,"value":"手机"}],"unique_index":"/a[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数3_链接地址","desc":"","relativeXPath":"/a[1]","allXPaths":["/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"],"exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"/a[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数4_文本","desc":"","relativeXPath":"/span[2]","allXPaths":["/span[2]","//span[contains(., '/')]","//SPAN[@class='LeftSide_cate_menu_line__vzQu9 undefined']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/span"],"exampleValues":[{"num":0,"value":"/"}],"unique_index":"/span[2]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":1,"contentType":0,"relative":true,"name":"参数5_链接文本","desc":"","relativeXPath":"/a[2]","allXPaths":["/a[2]","//a[contains(., '数码')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a"],"exampleValues":[{"num":0,"value":"数码"}],"unique_index":"/a[2]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数6_链接地址","desc":"","relativeXPath":"/a[2]","allXPaths":["/a[2]","//a[contains(., '数码')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a"],"exampleValues":[{"num":0,"value":"https://shuma.jd.com/"}],"unique_index":"/a[2]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数7_文本","desc":"","relativeXPath":"/span[3]","allXPaths":["/span[3]","//span[contains(., '/')]","//SPAN[@class='LeftSide_cate_menu_line__vzQu9 undefined']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-9]/span"],"exampleValues":[{"num":3,"value":"/"}],"unique_index":"/span[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":1,"contentType":0,"relative":true,"name":"参数8_链接文本","desc":"","relativeXPath":"/a[3]","allXPaths":["/a[3]","//a[contains(., '厨具')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-9]/a"],"exampleValues":[{"num":3,"value":"厨具"}],"unique_index":"/a[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数9_链接地址","desc":"","relativeXPath":"/a[3]","allXPaths":["/a[3]","//a[contains(., '厨具')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-9]/a"],"exampleValues":[{"num":3,"value":"https://channel.jd.com/kitchenware.html"}],"unique_index":"/a[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数10_文本","desc":"","relativeXPath":"/span[4]","allXPaths":["/span[4]","//span[contains(., '/')]","//SPAN[@class='LeftSide_cate_menu_line__vzQu9 undefined']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-8]/span"],"exampleValues":[{"num":4,"value":"/"}],"unique_index":"/span[4]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":1,"contentType":0,"relative":true,"name":"参数11_链接文本","desc":"","relativeXPath":"/a[4]","allXPaths":["/a[4]","//a[contains(., '工业品')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-8]/a"],"exampleValues":[{"num":4,"value":"工业品"}],"unique_index":"/a[4]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数12_链接地址","desc":"","relativeXPath":"/a[4]","allXPaths":["/a[4]","//a[contains(., '工业品')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-8]/a"],"exampleValues":[{"num":4,"value":"https://pro.jd.com/mall/active/2u2DR1dUiK34csAE3DqmcG8aXvUK/index.html"}],"unique_index":"/a[4]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":4,"contentType":0,"relative":false,"name":"参数13_图片地址","desc":"","extractType":0,"relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[3]/div[1]/div[1]/a[1]/img[1]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[3]/div[1]/div[1]/a[1]/img[1]","//img[contains(., '')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-1]/div/div[last()-1]/div/div[last()-1]/div/div[last()-6]/div/div/a/img"],"exampleValues":[{"num":0,"value":"//m.360buyimg.com/babel/s1125x600_jfs/t1/156011/19/36990/85599/646c850aF5e22eaa0/87641bfb5cf707ba.jpg!q70.dpg"}],"unique_index":"65z1z1niylfljutw14e","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]} \ No newline at end of file diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 8a9d6a1..c1aa0ec 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -15,7 +15,7 @@ import time import requests from urllib.parse import urljoin from lxml import etree -import undetected_chromedriver as uc +# import undetected_chromedriver as uc from pynput.keyboard import Key, Listener from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys @@ -42,7 +42,9 @@ import pytesseract from PIL import Image # import uuid from threading import Thread, Event -from myChrome import MyChrome, MyUCChrome +from myChrome import MyChrome +if sys.platform != "darwin": + from myChrome import MyUCChrome from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel desired_capabilities = DesiredCapabilities.CHROME desired_capabilities["pageLoadStrategy"] = "none" @@ -1327,8 +1329,8 @@ class BrowserThread(Thread): if __name__ == '__main__': - from multiprocessing import freeze_support - freeze_support() # 防止无限死循环多开 + # from multiprocessing import freeze_support + # freeze_support() # 防止无限死循环多开 config = { "id": [0], "saved_file_name": "", @@ -1361,6 +1363,9 @@ if __name__ == '__main__': # option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome" # driver_path = os.getcwd()+ "/chromedriver_mac64" print(driver_path) + if c.config_folder == "": + c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/") + # print("Config folder for MacOS:", c.config_folder) elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径 print("Finding chromedriver in EasySpider", os.getcwd()+"/EasySpider") @@ -1425,6 +1430,7 @@ if __name__ == '__main__': try: with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f: config = json.load(f) + print("Config file path: " + c.config_folder + c.config_file_name) absolute_user_data_folder = config["absolute_user_data_folder"] print("\nAbsolute_user_data_folder:", absolute_user_data_folder, "\n") @@ -1501,13 +1507,15 @@ if __name__ == '__main__': browser_t = MyChrome( options=options, chrome_options=option, executable_path=driver_path) elif cloudflare == 1: - if sys.platform == "linux": - import ssl - ssl._create_default_https_context = ssl._create_unverified_context # 忽略证书验证 - browser_t = MyUCChrome( + if sys.platform != "darwin": + browser_t = MyUCChrome( options=options, chrome_options=option, driver_executable_path=driver_path) - print("Pass Cloudflare Mode") - print("过Cloudflare验证模式") + print("Pass Cloudflare Mode") + print("过Cloudflare验证模式") + else: + print("Not support Cloudflare Mode on MacOS") + print("MacOS不支持Cloudflare验证模式") + sys.exit() event = Event() event.set() thread = BrowserThread(browser_t, i, service, diff --git a/ExecuteStage/myChrome.py b/ExecuteStage/myChrome.py index 9926368..27b55e2 100644 --- a/ExecuteStage/myChrome.py +++ b/ExecuteStage/myChrome.py @@ -12,7 +12,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.support.ui import Select from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By -import undetected_chromedriver_ES as uc desired_capabilities = DesiredCapabilities.CHROME desired_capabilities["pageLoadStrategy"] = "none" @@ -89,77 +88,80 @@ class MyChrome(webdriver.Chrome): raise NoSuchElementException else: return super().find_elements(by=by, value=value) - -class MyUCChrome(uc.Chrome): +import sys +if sys.platform != "darwin": # MacOS不支持Cloudflare + import undetected_chromedriver_ES as uc - def __init__(self, *args, **kwargs): - self.iframe_env = False # 现在的环境是root还是iframe - super().__init__(*args, **kwargs) # 调用父类的 __init__ + class MyUCChrome(uc.Chrome): - def find_element(self, by=By.ID, value=None, iframe=False): - # 在这里改变查找元素的行为 - if self.iframe_env: - super().switch_to.default_content() - self.iframe_env = False - if iframe: - # 获取所有的 iframe - try: - iframes = super().find_elements(By.CSS_SELECTOR, "iframe") - except Exception as e: - print(e) - find_element = False - # 遍历所有的 iframe 并点击里面的元素 - for iframe in iframes: - # 切换到 iframe + def __init__(self, *args, **kwargs): + self.iframe_env = False # 现在的环境是root还是iframe + super().__init__(*args, **kwargs) # 调用父类的 __init__ + + def find_element(self, by=By.ID, value=None, iframe=False): + # 在这里改变查找元素的行为 + if self.iframe_env: super().switch_to.default_content() - super().switch_to.frame(iframe) - self.iframe_env = True - try: - # 在 iframe 中查找并点击元素 - # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 - element = super().find_element(by=by, value=value) - find_element = True - except: - print("No such element found in the iframe") - # 完成操作后切回主文档 - # super().switch_to.default_content() - if find_element: - return element - if not find_element: - raise NoSuchElementException - else: - return super().find_element(by=by, value=value) - - def find_elements(self, by=By.ID, value=None, iframe=False): - # 在这里改变查找元素的行为 - if self.iframe_env: - super().switch_to.default_content() - self.iframe_env = False - if iframe: - # 获取所有的 iframe - iframes = super().find_elements(By.CSS_SELECTOR, "iframe") - find_element = False - # 遍历所有的 iframe 并点击里面的元素 - for iframe in iframes: - # 切换到 iframe + self.iframe_env = False + if iframe: + # 获取所有的 iframe try: + iframes = super().find_elements(By.CSS_SELECTOR, "iframe") + except Exception as e: + print(e) + find_element = False + # 遍历所有的 iframe 并点击里面的元素 + for iframe in iframes: + # 切换到 iframe super().switch_to.default_content() super().switch_to.frame(iframe) self.iframe_env = True - # 在 iframe 中查找并点击元素 - # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 - elements = super().find_elements(by=by, value=value) - if len(elements) > 0: + try: + # 在 iframe 中查找并点击元素 + # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 + element = super().find_element(by=by, value=value) find_element = True + except: + print("No such element found in the iframe") # 完成操作后切回主文档 # super().switch_to.default_content() if find_element: - return elements - except: - print("No such element found in the iframe") - if not find_element: - raise NoSuchElementException - else: - return super().find_elements(by=by, value=value) + return element + if not find_element: + raise NoSuchElementException + else: + return super().find_element(by=by, value=value) + + def find_elements(self, by=By.ID, value=None, iframe=False): + # 在这里改变查找元素的行为 + if self.iframe_env: + super().switch_to.default_content() + self.iframe_env = False + if iframe: + # 获取所有的 iframe + iframes = super().find_elements(By.CSS_SELECTOR, "iframe") + find_element = False + # 遍历所有的 iframe 并点击里面的元素 + for iframe in iframes: + # 切换到 iframe + try: + super().switch_to.default_content() + super().switch_to.frame(iframe) + self.iframe_env = True + # 在 iframe 中查找并点击元素 + # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 + elements = super().find_elements(by=by, value=value) + if len(elements) > 0: + find_element = True + # 完成操作后切回主文档 + # super().switch_to.default_content() + if find_element: + return elements + except: + print("No such element found in the iframe") + if not find_element: + raise NoSuchElementException + else: + return super().find_elements(by=by, value=value) diff --git a/ExecuteStage/utils.py b/ExecuteStage/utils.py index 0d70081..bc788ca 100644 --- a/ExecuteStage/utils.py +++ b/ExecuteStage/utils.py @@ -4,6 +4,7 @@ import csv import datetime import json import os +import sys import re import time import uuid @@ -219,6 +220,11 @@ class myMySQL: def __init__(self, config_file="mysql_config.json"): # 读取配置文件 try: + if sys.platform == "darwin": + if config_file.find("./") >= 0: + config_file = config_file.replace("./", "") + config_file = os.path.expanduser("~/Library/Application Support/EasySpider/" + config_file) + print("MySQL config file path: ", config_file) with open(config_file, 'r') as f: config = json.load(f) host = config["host"]