diff --git a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py index b78fe99..5f6c067 100644 --- a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py +++ b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/easyspider_executestage.py @@ -1,6 +1,12 @@ # -*- coding: utf-8 -*- # import atexit -from utils import download_image, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, replace_field_values, write_to_csv, write_to_excel, write_to_json +import atexit +import copy +import shutil +import string +import undetected_chromedriver as uc +from utils import download_image, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \ + on_press_creator, on_release_creator, readCode, replace_field_values, write_to_csv, write_to_excel, write_to_json from myChrome import MyChrome from threading import Thread, Event from PIL import Image @@ -22,6 +28,7 @@ from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.service import Service from pynput.keyboard import Key, Listener from datetime import datetime import io # 遇到错误退出时应执行的代码 @@ -40,8 +47,8 @@ from ddddocr import DdddOcr from urllib.parse import urljoin from lxml import etree import onnxruntime + onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志 -import undetected_chromedriver as uc # import pandas as pd # import numpy # import pytesseract @@ -53,7 +60,7 @@ desired_capabilities["pageLoadStrategy"] = "none" class BrowserThread(Thread): - def __init__(self, browser_t, id, service, version, event, saveName, config): + def __init__(self, browser_t, id, service, version, event, saveName, config, option): Thread.__init__(self) self.logs = io.StringIO() try: @@ -61,6 +68,7 @@ class BrowserThread(Thread): except: self.log = True self.browser = browser_t + self.option = option self.config = config self.version = version self.totalSteps = 0 @@ -76,25 +84,32 @@ class BrowserThread(Thread): self.SAVED = False self.BREAK = False self.CONTINUE = False + try: + maximizeWindow = service["maximizeWindow"] + except: + maximizeWindow = 0 + if maximizeWindow == 1: + self.browser.maximize_window() # 名称设定 if saveName != "": # 命令行覆盖保存名称 self.saveName = saveName # 保存文件的名字 now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") self.saveName = self.saveName.replace("current_time", now) - self.print_and_log("任务ID", i, "的保存文件名为:", self.saveName) - self.print_and_log("Save Name for task ID", i, "is:", self.saveName) - if not os.path.exists("Data/Task_" + str(i)): - os.mkdir("Data/Task_" + str(i)) - if not os.path.exists("Data/Task_" + str(i) + "/" + self.saveName): - os.mkdir("Data/Task_" + str(i) + "/" + + self.print_and_log("任务ID", id, "的保存文件名为:", self.saveName) + self.print_and_log("Save Name for task ID", id, "is:", self.saveName) + if not os.path.exists("Data/Task_" + str(id)): + os.mkdir("Data/Task_" + str(id)) + if not os.path.exists("Data/Task_" + str(id) + "/" + self.saveName): + os.mkdir("Data/Task_" + str(id) + "/" + self.saveName) # 创建保存文件夹用来保存截图 self.getDataStep = 0 self.startSteps = 0 try: startFromExit = service["startFromExit"] # 从上次退出的步骤开始 if startFromExit == 1: - with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_steps.txt', 'r', encoding='utf-8-sig') as file_obj: + with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_steps.txt', 'r', + encoding='utf-8-sig') as file_obj: self.startSteps = int(file_obj.read()) # 读取已执行步数 except: pass @@ -114,8 +129,13 @@ class BrowserThread(Thread): js = f.read() self.print_and_log("Loading stealth.min.js") self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', { - 'source': js}) # TMALL 反扒 + 'source': js}) # TMALL 反扒 WebDriverWait(self.browser, 10) + self.browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command') + path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id)) + self.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': path}} + + self.browser.execute("send_command", self.paramss) # 下载地址改变 # self.browser.get('about:blank') self.procedure = service["graph"] # 程序执行流程 try: @@ -148,8 +168,11 @@ class BrowserThread(Thread): self.save_threshold = service["saveThreshold"] # 保存最低阈值 except: self.save_threshold = 10 - self.links = list( - filter(isnotnull, service["links"].split("\n"))) # 要执行的link的列表 + try: + self.links = list( + filter(isnotnull, service["links"].split("\n"))) # 要执行的link的列表 + except: + self.links = list(filter(isnotnull, service["url"])) # 要执行的link self.OUTPUT = [] # 采集的数据 self.writeMode = 1 # 写入模式,0为新建,1为追加 if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx": @@ -157,7 +180,7 @@ class BrowserThread(Thread): self.OUTPUT.append([]) # 添加表头 self.writeMode = 0 elif self.outputFormat == "json": - self.writeMode = 3 # JSON模式无需判断是否存在文件 + self.writeMode = 3 # JSON模式无需判断是否存在文件 elif self.outputFormat == "mysql": self.mysql = myMySQL(config["mysql_config_path"]) self.mysql.create_table(self.saveName, service["outputParameters"]) @@ -228,7 +251,11 @@ class BrowserThread(Thread): cookies = node["parameters"]["cookies"] except: node["parameters"]["cookies"] = "" - if node["option"] == 2: # 点击操作 + elif node["option"] == 2: # 点击操作 + try: + alertHandleType = node["parameters"]["alertHandleType"] + except: + node["parameters"]["alertHandleType"] = 0 if node["parameters"]["useLoop"]: if self.task_version <= "0.3.5": # 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath @@ -263,8 +290,10 @@ class BrowserThread(Thread): if para["contentType"] == 8: self.print_and_log( "默认的ddddocr识别功能如果觉得不好用,可以自行修改源码get_content函数->contentType == 8的位置换成自己想要的OCR模型然后自己编译运行;或者可以先设置采集内容类型为“元素截图”把图片保存下来,然后用自定义操作调用自己写的程序,程序的功能是读取这个最新生成的图片,然后用好用的模型,如PaddleOCR把图片识别出来,然后把返回值返回给程序作为参数输出。") - self.print_and_log("If you think the default ddddocr function is not good enough, you can modify the source code get_content function -> contentType == 8 position to your own OCR model and then compile and run it; or you can first set the content type of the crawler to \"Element Screenshot\" to save the picture, and then call your own program with custom operations. The function of the program is to read the latest generated picture, then use a good model, such as PaddleOCR to recognize the picture, and then return the return value as a parameter output to the program.") - if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2: + self.print_and_log( + "If you think the default ddddocr function is not good enough, you can modify the source code get_content function -> contentType == 8 position to your own OCR model and then compile and run it; or you can first set the content type of the crawler to \"Element Screenshot\" to save the picture, and then call your own program with custom operations. The function of the program is to read the latest generated picture, then use a good model, such as PaddleOCR to recognize the picture, and then return the return value as a parameter output to the program.") + if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para[ + "nodeType"] <= 2: para["optimizable"] = True else: para["optimizable"] = False @@ -289,6 +318,13 @@ class BrowserThread(Thread): node["parameters"]["xpath"] = "" self.print_and_log("您的任务版本号为" + self.task_version + ",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath") + elif node["option"] == 8: # 循环操作 + try: + exitElement = node["parameters"]["exitElement"] + if exitElement == "": + node["parameters"]["exitElement"] = "//body" + except: + node["parameters"]["exitElement"] = "//body" self.print_and_log("预处理完成|Preprocess completed") def readFromExcel(self): @@ -299,8 +335,9 @@ class BrowserThread(Thread): except: self.print_and_log("读取Excel失败,将会使用默认参数执行任务,请检查文件路径是否正确:", os.path.abspath(self.inputExcel)) - self.print_and_log("Failed to read Excel, will execute the task with default parameters, please check if the file path is correct: ", - os.path.abspath(self.inputExcel)) + self.print_and_log( + "Failed to read Excel, will execute the task with default parameters, please check if the file path is correct: ", + os.path.abspath(self.inputExcel)) time.sleep(5) return 0 @@ -329,7 +366,7 @@ class BrowserThread(Thread): if "urlList_0" in data.keys(): self.links = data["urlList_0"] except: - pass + self.links = "about:blank" task = self.service for key, value in data.items(): for i in range(len(task["inputParameters"])): @@ -367,7 +404,21 @@ class BrowserThread(Thread): self.saveData(exit=True) if self.outputFormat == "mysql": self.mysql.close() - + try: + quitWaitTime = self.service["quitWaitTime"] + except: + quitWaitTime = 60 + self.print_and_log(f"任务执行完毕,将在{quitWaitTime}秒后自动退出浏览器并清理临时用户目录,等待时间可在保存任务对话框中设置。") + self.print_and_log(f"The task is completed, the browser will exit automatically and the temporary user directory will be cleaned up after {quitWaitTime} seconds, the waiting time can be set in the save task dialog.") + time.sleep(quitWaitTime) + self.browser.quit() + self.print_and_log("正在清理临时用户目录……|Cleaning up temporary user directory...") + try: + shutil.rmtree(self.option["tmp_user_data_folder"]) + except: + pass + self.print_and_log("清理完成!|Clean up completed!") + def recordLog(self, *args, **kwargs): now = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") print(now + ":", *args, file=self.logs, **kwargs) @@ -387,29 +438,32 @@ class BrowserThread(Thread): # 写入日志 # self.recordLog("持久化存储数据/Persistently store data") if self.log: - with open("Data/Task_" + str(self.id) + "/" + self.saveName + '.log', 'a', encoding='utf-8-sig') as file_obj: + with open("Data/Task_" + str(self.id) + "/" + self.saveName + '.log', 'a', + encoding='utf-8-sig') as file_obj: file_obj.write(self.logs.getvalue()) file_obj.close() # 写入已执行步数 - with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_steps.txt', 'w', encoding='utf-8-sig') as file_obj: + with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_steps.txt', 'w', + encoding='utf-8-sig') as file_obj: file_obj.write(str(self.totalSteps + 1)) file_obj.close() # 写入数据 if self.outputFormat == "csv" or self.outputFormat == "txt": file_name = "Data/Task_" + \ - str(self.id) + "/" + self.saveName + \ - '.' + self.outputFormat + str(self.id) + "/" + self.saveName + \ + '.' + self.outputFormat write_to_csv(file_name, self.OUTPUT, self.outputParametersRecord) elif self.outputFormat == "xlsx": file_name = "Data/Task_" + \ - str(self.id) + "/" + self.saveName + '.xlsx' + str(self.id) + "/" + self.saveName + '.xlsx' write_to_excel( file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord) elif self.outputFormat == "json": file_name = "Data/Task_" + \ - str(self.id) + "/" + self.saveName + '.json' - write_to_json(file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord, self.outputParameters.keys()) + str(self.id) + "/" + self.saveName + '.json' + write_to_json(file_name, self.OUTPUT, self.outputParametersTypes, + self.outputParametersRecord, self.outputParameters.keys()) elif self.outputFormat == "mysql": self.mysql.write_to_mysql( self.OUTPUT, self.outputParametersRecord, self.outputParametersTypes) @@ -549,6 +603,7 @@ class BrowserThread(Thread): self.recordLog("JavaScript execution failed") elif int(codeMode) == 5: try: + code = readCode(code) output = exec(code) self.recordLog("执行下面的代码:" + code) self.recordLog("Execute the following code:" + code) @@ -558,6 +613,7 @@ class BrowserThread(Thread): code, ", error is:", e) elif int(codeMode) == 6: try: + code = readCode(code) output = eval(code) self.recordLog("获得下面的代码返回值:" + code) self.recordLog( @@ -618,6 +674,13 @@ class BrowserThread(Thread): elif codeMode == 4: self.CONTINUE = True self.recordLog("跳过本次循环|Skip this loop") + elif codeMode == 7: # 暂停程序执行 + self.event.clear() + self.print_and_log( + f"根据设置的自定义操作,任务已暂停,长按{self.service['pauseKey']}键继续执行...|Task paused according to custom operation, long press '{self.service['pauseKey']}' to continue...") + elif codeMode == 8: # 刷新页面 + self.browser.refresh() + self.print_and_log("根据设置的自定义操作,任务已刷新页面|Task refreshed page according to custom operation") else: # 0 1 5 6 output = self.execute_code( codeMode, code, max_wait_time, iframe=paras["iframe"]) @@ -647,7 +710,8 @@ class BrowserThread(Thread): optionValue = loopValue optionMode = 1 try: - xpath = replace_field_values(para["xpath"], self.outputParameters, self) + xpath = replace_field_values( + para["xpath"], self.outputParameters, self) dropdown = Select(self.browser.find_element( By.XPATH, xpath, iframe=para["iframe"])) try: @@ -678,14 +742,15 @@ class BrowserThread(Thread): def moveToElement(self, para, loopElement=None, loopPath="", index=0): time.sleep(0.1) # 移动之前等待0.1秒 loopPath = replace_field_values(loopPath, self.outputParameters, self) - xpath = replace_field_values(para["xpath"], self.outputParameters, self) + xpath = replace_field_values( + para["xpath"], self.outputParameters, self) if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath if xpath == "": path = loopPath else: path = "(" + loopPath + ")" + \ - "[" + str(index + 1) + "]" + \ - xpath + "[" + str(index + 1) + "]" + \ + xpath index = 0 # 如果是相对循环内元素的点击,在定位到元素后,index应该重置为0 # element = loopElement else: @@ -746,8 +811,8 @@ class BrowserThread(Thread): for i in node["sequence"]: # 从根节点开始向下读取 self.executeNode(i, loopValue, loopPath, index) elif node["option"] == 1: # 打开网页操作 - if not (nodeId == 1 and self.service["cloudflare"] == 1): - self.openPage(node["parameters"], loopValue) + # if not (nodeId == 1 and self.service["cloudflare"] == 1): + self.openPage(node["parameters"], loopValue) elif node["option"] == 2: # 点击元素 self.clickElement(node["parameters"], loopValue, loopPath, index) elif node["option"] == 3: # 提取数据 @@ -842,16 +907,20 @@ class BrowserThread(Thread): elif tType <= 8: # JS命令返回值 if tType == 5: # JS命令返回值等于 output = self.execute_code( - 0, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], iframe=cnode["parameters"]["iframe"]) + 0, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], + iframe=cnode["parameters"]["iframe"]) elif tType == 6: # System output = self.execute_code( - 1, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], iframe=cnode["parameters"]["iframe"]) + 1, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], + iframe=cnode["parameters"]["iframe"]) elif tType == 7: # 针对当前循环项的JS命令返回值 output = self.execute_code( - 2, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], loopElement, iframe=cnode["parameters"]["iframe"]) + 2, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], loopElement, + iframe=cnode["parameters"]["iframe"]) elif tType == 8: # 针对当前循环项的System命令返回值 output = self.execute_code( - 6, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], loopElement, iframe=cnode["parameters"]["iframe"]) + 6, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], loopElement, + iframe=cnode["parameters"]["iframe"]) try: if output.find("rue") != -1: # 如果返回值中包含true code = 1 @@ -869,14 +938,52 @@ class BrowserThread(Thread): self.recordLog( "判断条件内所有条件分支的条件都不满足|None of the conditions in the judgment condition are met") + def handleHistory(self, node, xpath, thisHitoryURL, thisHistoryLength, index, element=None, elements=None): + if self.history["index"] != thisHistoryLength and self.history["handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断 + difference = thisHistoryLength - self.history["index"] # 计算历史记录变化差值 + self.browser.execute_script('history.go(' + str(difference) + ')') # 回退历史记录 + # if node["parameters"]["historyWait"] > 2: # 回退后要等待的时间 + time.sleep(node["parameters"]["historyWait"]) + # else: + # time.sleep(2) + try: + self.browser.execute_script('window.stop()') + except: + pass + ti = 0 + if self.browser.current_url.startswith("data:"): + while self.browser.current_url != thisHitoryURL: # 如果执行完一次循环之后网址发生了变化 + try: + self.browser.execute_script("history.go(1)") # 如果是data:开头的网址,就前进一步 + except: # 超时的情况下 + pass + ti += 1 + if self.browser.current_url == thisHitoryURL or ti > thisHistoryLength: # 如果执行完一次循环之后网址发生了变化 + break + time.sleep(2) + if element == None: # 不固定元素列表 + element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"]) + else: # 固定元素列表 + element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"]) + if index > 0: + index -= 1 # 如果是data:开头的网址,就要重试一次 + else: + if element == None: + element = elements + return index, element + # 对循环的处理 def loopExecute(self, node, loopValue, clickPath="", index=0): time.sleep(0.1) # 第一次执行循环的时候强制等待1秒 thisHandle = self.browser.current_window_handle # 记录本次循环内的标签页的ID - thisHistoryLength = self.browser.execute_script( - 'return history.length') # 记录本次循环内的history的length + try: + thisHistoryLength = self.browser.execute_script( + 'return history.length') # 记录本次循环内的history的length + except: + thisHistoryLength = 0 self.history["index"] = thisHistoryLength self.history["handle"] = thisHandle + thisHitoryURL = self.browser.current_url if int(node["parameters"]["loopType"]) == 0: # 单个元素循环 # 无跳转标签页操作 count = 0 # 执行次数 @@ -887,8 +994,7 @@ class BrowserThread(Thread): # newBodyText = self.browser.page_source # newBodyText = self.browser.find_element(By.XPATH, "//body").text if node["parameters"]["exitCount"] == 0: - newBodyText = self.browser.find_element( - By.CSS_SELECTOR, "body", iframe=False).text + newBodyText = self.browser.find_element(By.XPATH, node["parameters"]["exitElement"], iframe=node["parameters"]["iframe"]).text if node["parameters"]["iframe"]: # 如果标记了iframe iframes = self.browser.find_elements( By.CSS_SELECTOR, "iframe", iframe=False) @@ -955,7 +1061,9 @@ class BrowserThread(Thread): break if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件 output = self.execute_code(int( - node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"]) + node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], + node["parameters"]["breakCodeWaitTime"], + iframe=node["parameters"]["iframe"]) code = get_output_code(output) if code <= 0: break @@ -970,7 +1078,8 @@ class BrowserThread(Thread): self.print_and_log("Loop element not found: ", xpath) self.print_and_log("找不到循环元素: ", xpath) - for index in range(len(elements)): + index = 0 + while index < len(elements): for i in node["sequence"]: # 挨个顺序执行循环里所有的操作 self.executeNode(i, elements[index], xpath, index) @@ -998,29 +1107,16 @@ class BrowserThread(Thread): self.print_and_log("关闭标签页发生错误:", e) self.print_and_log( "Error occurred while closing tab: ", e) - if self.history["index"] != thisHistoryLength and self.history[ - "handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断 - difference = thisHistoryLength - \ - self.history["index"] # 计算历史记录变化差值 - self.browser.execute_script( - 'history.go(' + str(difference) + ')') # 回退历史记录 - # if node["parameters"]["historyWait"] > 2: # 回退后要等待的时间 - time.sleep(node["parameters"]["historyWait"]) - # else: - # time.sleep(2) - # 切换历史记录等待: - self.recordLog("Change history back time or: ", - node["parameters"]["historyWait"]) - try: - self.browser.execute_script('window.stop()') - except: - pass + index, elements = self.handleHistory(node, xpath, thisHitoryURL, thisHistoryLength, index, elements=elements) if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件 output = self.execute_code(int( - node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"]) + node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], + node["parameters"]["breakCodeWaitTime"], + iframe=node["parameters"]["iframe"]) code = get_output_code(output) if code <= 0: break + index = index + 1 except NoSuchElementException: self.print_and_log("Loop element not found: ", xpath) self.print_and_log("找不到循环元素: ", xpath) @@ -1028,9 +1124,14 @@ class BrowserThread(Thread): raise elif int(node["parameters"]["loopType"]) == 2: # 固定元素列表 # 千万不要忘了分割!! - for path in node["parameters"]["pathList"].split("\n"): + paths = node["parameters"]["pathList"].split("\n") + # for path in node["parameters"]["pathList"].split("\n"): + index = 0 + while index < len(paths): + path = paths[index] try: - path = replace_field_values(path, self.outputParameters, self) + path = replace_field_values( + path, self.outputParameters, self) element = self.browser.find_element( By.XPATH, path, iframe=node["parameters"]["iframe"]) # self.recordLog("循环元素|Loop element:", path) @@ -1060,34 +1161,23 @@ class BrowserThread(Thread): self.print_and_log("关闭标签页发生错误:", e) self.print_and_log( "Error occurred while closing tab: ", e) - if self.history["index"] != thisHistoryLength and self.history[ - "handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断 - difference = thisHistoryLength - \ - self.history["index"] # 计算历史记录变化差值 - self.browser.execute_script( - 'history.go(' + str(difference) + ')') # 回退历史记录 - # if node["parameters"]["historyWait"] > 2: # 回退后要等待的时间 - time.sleep(node["parameters"]["historyWait"]) - # else: - # time.sleep(2) - self.recordLog("Change history back time or: ", - node["parameters"]["historyWait"]) - try: - self.browser.execute_script('window.stop()') - except: - pass + index, element = self.handleHistory(node, path, thisHitoryURL, thisHistoryLength, index, element=element) except NoSuchElementException: self.print_and_log("Loop element not found: ", path) self.print_and_log("找不到循环元素: ", path) + index += 1 continue # 循环中找不到元素就略过操作 except Exception as e: raise if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件 output = self.execute_code(int( - node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"]) + node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], + node["parameters"]["breakCodeWaitTime"], + iframe=node["parameters"]["iframe"]) code = get_output_code(output) if code <= 0: break + index = index + 1 elif int(node["parameters"]["loopType"]) == 3: # 固定文本列表 textList = node["parameters"]["textList"].split("\n") if len(textList) == 1: # 如果固定文本列表只有一行,现在就可以替换变量 @@ -1106,7 +1196,9 @@ class BrowserThread(Thread): break if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件 output = self.execute_code(int( - node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"]) + node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], + node["parameters"]["breakCodeWaitTime"], + iframe=node["parameters"]["iframe"]) code = get_output_code(output) if code <= 0: break @@ -1134,7 +1226,9 @@ class BrowserThread(Thread): break if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件 output = self.execute_code(int( - node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"]) + node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], + node["parameters"]["breakCodeWaitTime"], + iframe=node["parameters"]["iframe"]) code = get_output_code(output) if code <= 0: break @@ -1142,13 +1236,16 @@ class BrowserThread(Thread): while True: # do while循环 if int(node["parameters"]["loopType"]) == 5: # JS output = self.execute_code( - 0, node["parameters"]["code"], node["parameters"]["waitTime"], iframe=node["parameters"]["iframe"]) + 0, node["parameters"]["code"], node["parameters"]["waitTime"], + iframe=node["parameters"]["iframe"]) elif int(node["parameters"]["loopType"]) == 6: # System output = self.execute_code( - 1, node["parameters"]["code"], node["parameters"]["waitTime"], iframe=node["parameters"]["iframe"]) + 1, node["parameters"]["code"], node["parameters"]["waitTime"], + iframe=node["parameters"]["iframe"]) elif int(node["parameters"]["loopType"]) == 7: # Python output = self.execute_code( - 6, node["parameters"]["code"], node["parameters"]["waitTime"], iframe=node["parameters"]["iframe"]) + 6, node["parameters"]["code"], node["parameters"]["waitTime"], + iframe=node["parameters"]["iframe"]) code = get_output_code(output) if code <= 0: break @@ -1224,13 +1321,17 @@ class BrowserThread(Thread): "return history.length") except: self.history["index"] = 0 + except Exception as e: + self.print_and_log("History Length Error") + self.history["index"] = 0 self.scrollDown(para) # 控制屏幕向下滚动 # 键盘输入事件 def inputInfo(self, para, loopValue): time.sleep(0.1) # 输入之前等待0.1秒 try: - xpath = replace_field_values(para["xpath"], self.outputParameters, self) + xpath = replace_field_values( + para["xpath"], self.outputParameters, self) textbox = self.browser.find_element( By.XPATH, xpath, iframe=para["iframe"]) # textbox.send_keys(Keys.CONTROL, 'a') @@ -1289,15 +1390,17 @@ class BrowserThread(Thread): try: # element = self.browser.find_element( # By.XPATH, path, iframe=para["iframe"]) - clickPath = replace_field_values(clickPath, self.outputParameters, self) - xpath = replace_field_values(para["xpath"], self.outputParameters, self) + clickPath = replace_field_values( + clickPath, self.outputParameters, self) + xpath = replace_field_values( + para["xpath"], self.outputParameters, self) if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath if xpath == "": path = clickPath else: path = "(" + clickPath + ")" + \ - "[" + str(index + 1) + "]" + \ - xpath + "[" + str(index + 1) + "]" + \ + xpath index = 0 # 如果是相对循环内元素的点击,在定位到元素后,index应该重置为0 # element = loopElement else: @@ -1326,7 +1429,7 @@ class BrowserThread(Thread): actions.click(element).perform() elif click_way == 1: # 用js的点击方法 script = 'var result = document.evaluate(`' + path + \ - '`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i 0: + try: + time.sleep(1.5) + alert = self.browser.switch_to.alert + alertHandleType = int(para["alertHandleType"]) + if alertHandleType == 1: + alert.accept() + self.print_and_log("已点击确认|Clicked OK") + elif alertHandleType == 2: + alert.dismiss() + self.print_and_log("已点击取消|Clicked Cancel") + except Exception as e: + self.print_and_log("找不到弹窗|Cannot find alert") + # 点击后对该元素执行一段JavaScript代码 try: if para["afterJS"] != "": @@ -1375,6 +1494,9 @@ class BrowserThread(Thread): pass self.history["index"] = self.browser.execute_script( "return history.length") + except Exception as e: + self.print_and_log("History Length Error") + self.history["index"] = 0 else: try: self.history["index"] = self.browser.execute_script( @@ -1387,6 +1509,9 @@ class BrowserThread(Thread): self.history["index"] = self.browser.execute_script( "return history.length") # 如果打开了新窗口,切换到新窗口 + except Exception as e: + self.print_and_log("History Length Error") + self.history["index"] = 0 self.scrollDown(para) # 根据参数配置向下滚动 # rt.end() @@ -1485,7 +1610,7 @@ class BrowserThread(Thread): image = Image.open(screenshot_stream).convert('L') temp_name = "OCR_" + str(time.time()) + ".png" location = "Data/Task_" + \ - str(self.id) + "/" + self.saveName + "/" + temp_name + str(self.id) + "/" + self.saveName + "/" + temp_name image.save(location) ocr = DdddOcr() with open(location, 'rb') as f: @@ -1556,7 +1681,8 @@ class BrowserThread(Thread): # 提取数据事件 def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0): - parentPath = replace_field_values(parentPath, self.outputParameters, self) + parentPath = replace_field_values( + parentPath, self.outputParameters, self) if para["clear"] == 1: self.clearOutputParameters() try: @@ -1591,7 +1717,8 @@ class BrowserThread(Thread): # relativeXPath = lowercase_tags_in_xpath(relativeXPath) # 已经有text()或@href了,不需要再加 content_type = "" - if relativeXPath.find("/@href") >= 0 or relativeXPath.find("/text()") >= 0 or relativeXPath.find("::text()") >= 0: + if relativeXPath.find("/@href") >= 0 or relativeXPath.find("/text()") >= 0 or relativeXPath.find( + "::text()") >= 0: content_type = "" elif p["nodeType"] == 2: content_type = "//@href" @@ -1608,8 +1735,8 @@ class BrowserThread(Thread): if relativeXPath.find("//") >= 0: if xpath.startswith("/"): full_path = "(" + parentPath + ")" + \ - "[" + str(index + 1) + "]" + \ - relativeXPath + content_type + "[" + str(index + 1) + "]" + \ + relativeXPath + content_type else: # 如果是id()这种形式,不需要包parentPath full_path = xpath try: @@ -1642,20 +1769,26 @@ class BrowserThread(Thread): else: content = p["default"] if not self.dataNotFoundKeys[p["name"]]: - self.print_and_log('Element %s not found with parameter name %s when extracting data, use default, this error will only show once' % ( - relativeXPath, p["name"])) - self.print_and_log("提取数据操作时,字段名 %s 对应XPath %s 未找到,使用默认值,本字段将不再重复报错" % ( - p["name"], relativeXPath)) + self.print_and_log( + 'Element %s not found with parameter name %s when extracting data, use default, this error will only show once' % ( + relativeXPath, p["name"])) + self.print_and_log( + "提取数据操作时,字段名 %s 对应XPath %s 未找到,使用默认值,本字段将不再重复报错" % ( + p["name"], relativeXPath)) self.dataNotFoundKeys[p["name"]] = True except Exception as e: if not self.dataNotFoundKeys[p["name"]]: - self.print_and_log('Element %s not found with parameter name %s when extracting data, use default, this error will only show once' % ( - relativeXPath, p["name"])) - self.print_and_log("提取数据操作时,字段名 %s 对应XPath %s 未找到(请查看原因,如是否翻页太快页面元素未加载出来),使用默认值,本字段将不再重复报错" % ( - p["name"], relativeXPath)) + self.print_and_log( + 'Element %s not found with parameter name %s when extracting data, use default, this error will only show once' % ( + relativeXPath, p["name"])) + self.print_and_log( + "提取数据操作时,字段名 %s 对应XPath %s 未找到(请查看原因,如是否翻页太快页面元素未加载出来),使用默认值,本字段将不再重复报错" % ( + p["name"], relativeXPath)) self.dataNotFoundKeys[p["name"]] = True - self.outputParameters[p["name"]] = content - + try: + self.outputParameters[p["name"]] = content + except: + self.outputParameters[p["name"]] = p["default"] # 对于不能优化的操作,使用selenium执行 for p in para["paras"]: if not p["optimizable"]: @@ -1676,8 +1809,8 @@ class BrowserThread(Thread): # relativeXPath + ")" + \ # "[" + str(index + 1) + "]" full_path = "(" + parentPath + ")" + \ - "[" + str(index + 1) + "]" + \ - relativeXPath + "[" + str(index + 1) + "]" + \ + relativeXPath element = self.browser.find_element( By.XPATH, full_path, iframe=p["iframe"]) else: @@ -1686,7 +1819,8 @@ class BrowserThread(Thread): else: element = self.browser.find_element( By.XPATH, relativeXPath, iframe=p["iframe"]) - except (NoSuchElementException, InvalidSelectorException, StaleElementReferenceException): # 找不到元素的时候,使用默认值 + except ( + NoSuchElementException, InvalidSelectorException, StaleElementReferenceException): # 找不到元素的时候,使用默认值 # self.print_and_log(p) try: content = p["default"] @@ -1695,10 +1829,12 @@ class BrowserThread(Thread): self.outputParameters[p["name"]] = content try: if not self.dataNotFoundKeys[p["name"]]: - self.print_and_log('Element %s not found with parameter name %s when extracting data, use default, this error will only show once' % ( - relativeXPath, p["name"])) - self.print_and_log("提取数据操作时,字段名 %s 对应XPath %s 未找到,使用默认值,本字段将不再重复报错" % ( - p["name"], relativeXPath)) + self.print_and_log( + 'Element %s not found with parameter name %s when extracting data, use default, this error will only show once' % ( + relativeXPath, p["name"])) + self.print_and_log( + "提取数据操作时,字段名 %s 对应XPath %s 未找到,使用默认值,本字段将不再重复报错" % ( + p["name"], relativeXPath)) except: pass continue @@ -1759,15 +1895,13 @@ class BrowserThread(Thread): self.maxViewLength, self.outputParametersRecord) self.OUTPUT.append(line) - if __name__ == '__main__': from multiprocessing import freeze_support - freeze_support() # 防止无限死循环多开 - + freeze_support() # 防止无限死循环多开 # 如果需要调试程序,请在命令行参数中加入--keyboard 0 来禁用键盘监听以提升调试速度 # If you need to debug the program, please add --keyboard 0 in the command line parameters to disable keyboard listening to improve debugging speed config = { - "id": [0], + "ids": [0], "saved_file_name": "", "user_data": False, "config_folder": "", @@ -1776,23 +1910,24 @@ if __name__ == '__main__': "headless": False, "server_address": "http://localhost:8074", "keyboard": True, # 是否监听键盘输入 - "version": "0.5.0", + "version": "0.6.0", } c = Config(config) print(c) - options = Options() + options = webdriver.ChromeOptions() driver_path = "chromedriver.exe" import platform + print(sys.platform, platform.architecture()) - option = webdriver.ChromeOptions() - if not os.path.exists(os.getcwd()+"/Data"): - os.mkdir(os.getcwd()+"/Data") + # option = webdriver.ChromeOptions() + if not os.path.exists(os.getcwd() + "/Data"): + os.mkdir(os.getcwd() + "/Data") if sys.platform == "darwin" and platform.architecture()[0] == "64bit": options.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome" # MacOS需要用option而不是options! - option.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome" - option.add_extension( - "EasySpider.app/Contents/Resources/app/XPathHelper.crx") + # option.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome" + # option.add_extension( + # "EasySpider.app/Contents/Resources/app/XPathHelper.crx") options.add_extension( "EasySpider.app/Contents/Resources/app/XPathHelper.crx") driver_path = "EasySpider.app/Contents/Resources/app/chromedriver_mac64" @@ -1805,32 +1940,32 @@ if __name__ == '__main__': c.config_folder = os.path.expanduser( "~/Library/Application Support/EasySpider/") # print("Config folder for MacOS:", c.config_folder) - elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径 + elif os.path.exists(os.getcwd() + "/EasySpider/resources"): # 打包后的路径 print("Finding chromedriver in EasySpider", - os.getcwd()+"/EasySpider") + os.getcwd() + "/EasySpider") if sys.platform == "win32" and platform.architecture()[0] == "32bit": options.binary_location = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置 - option.binary_location = os.path.join( - os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置 + # option.binary_location = os.path.join( + # os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置 driver_path = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe") - option.add_extension("EasySpider/resources/app/XPathHelper.crx") + # option.add_extension("EasySpider/resources/app/XPathHelper.crx") options.add_extension("EasySpider/resources/app/XPathHelper.crx") elif sys.platform == "win32" and platform.architecture()[0] == "64bit": options.binary_location = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") - option.binary_location = os.path.join( - os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") + # option.binary_location = os.path.join( + # os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") driver_path = os.path.join( os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe") - option.add_extension("EasySpider/resources/app/XPathHelper.crx") + # option.add_extension("EasySpider/resources/app/XPathHelper.crx") options.add_extension("EasySpider/resources/app/XPathHelper.crx") elif sys.platform == "linux" and platform.architecture()[0] == "64bit": options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" - option.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" + # option.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64" - option.add_extension("EasySpider/resources/app/XPathHelper.crx") + # option.add_extension("EasySpider/resources/app/XPathHelper.crx") options.add_extension("EasySpider/resources/app/XPathHelper.crx") else: print("Unsupported platform") @@ -1843,20 +1978,25 @@ if __name__ == '__main__': # options.binary_location = "./Chrome/chrome.exe" # 指定chrome位置 # # option.binary_location = "C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe" # driver_path = "./Chrome/chromedriver.exe" - elif os.path.exists(os.getcwd()+"/../ElectronJS"): + elif os.path.exists(os.getcwd() + "/../ElectronJS"): # 软件dev用 print("Finding chromedriver in EasySpider", - os.getcwd()+"/ElectronJS") - option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置 + os.getcwd() + "/ElectronJS") + # option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置 options.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置 driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe" - option.add_extension("../ElectronJS/XPathHelper.crx") + # option.add_extension("../ElectronJS/XPathHelper.crx") + options.add_extension("../ElectronJS/XPathHelper.crx") else: options.binary_location = "./chrome.exe" # 指定chrome位置 + # option.binary_location = "./chrome.exe" # 指定chrome位置 driver_path = "./chromedriver.exe" - option.add_extension("XPathHelper.crx") + # option.add_extension("XPathHelper.crx") + options.add_extension("XPathHelper.crx") - option.add_experimental_option( + # option.add_experimental_option( + # 'excludeSwitches', ['enable-automation']) # 以开发者模式 + options.add_experimental_option( 'excludeSwitches', ['enable-automation']) # 以开发者模式 # user_data_dir = r'' # 注意没有Default! @@ -1875,97 +2015,147 @@ if __name__ == '__main__': print("Config file path: " + c.config_folder + c.config_file_name) absolute_user_data_folder = config["absolute_user_data_folder"] - print("\nAbsolute_user_data_folder:", - absolute_user_data_folder, "\n") except: pass - if c.user_data: - option.add_argument( - f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒 - option.add_argument("--profile-directory=Default") - options.add_argument( - f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒 - options.add_argument("--profile-directory=Default") - - if c.headless: - print("Headless mode") - print("无头模式") - option.add_argument("--headless") - options.add_argument("--headless") # options.add_argument( # '--user-data-dir=C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\User Data') # TMALL 反扒 - option.add_argument( - "--disable-blink-features=AutomationControlled") # TMALL 反扒 + # option.add_argument( + # "--disable-blink-features=AutomationControlled") # TMALL 反扒 options.add_argument( "--disable-blink-features=AutomationControlled") # TMALL 反扒 options.add_argument('-ignore-certificate-errors') options.add_argument('-ignore -ssl-errors') - option.add_argument('-ignore-certificate-errors') - option.add_argument('-ignore -ssl-errors') + # option.add_argument('-ignore-certificate-errors') + # option.add_argument('-ignore -ssl-errors') + + if c.headless: + print("Headless mode") + print("无头模式") + # option.add_argument("--headless") + options.add_argument("--headless") + + tmp_options = [] + for id in c.ids: + tmp_options.append({"options": copy.deepcopy(options), "tmp_user_data_folder": ""}) + + if c.user_data: + tmp_user_folder_parent = os.path.join(os.getcwd(), "TempUserDataFolder") + if not os.path.exists(tmp_user_folder_parent): + os.mkdir(tmp_user_folder_parent) + characters = string.ascii_letters + string.digits + for i in range(len(c.ids)): + id = c.ids[i] + # 从字符集中随机选择字符构成字符串 + random_string = ''.join(random.choice(characters) for i in range(10)) + tmp_user_data_folder = os.path.join(tmp_user_folder_parent, "user_data_" + str(id) + "_" + str(time.time()).replace(".","") + "_" + random_string) + tmp_options[i]["tmp_user_data_folder"] = tmp_user_data_folder + if os.path.exists(tmp_user_data_folder): + shutil.rmtree(tmp_user_data_folder) + print(f"Copying user data folder to: {tmp_user_data_folder}, please wait...") + print(f"正在复制用户信息目录到: {tmp_user_data_folder},请稍等...") + shutil.copytree(absolute_user_data_folder, tmp_user_data_folder) + print("User data folder copied successfully, if you exit the program before it finishes, please delete the temporary user data folder manually.") + print("用户信息目录复制成功,如果程序在运行过程中被手动退出,请手动删除临时用户信息目录。") + # option = tmp_options[i]["option"] + options = tmp_options[i]["options"] + # option.add_argument( + # f'--user-data-dir={tmp_user_data_folder}') # TMALL 反扒 + # option.add_argument("--profile-directory=Default") + options.add_argument( + f'--user-data-dir={tmp_user_data_folder}') # TMALL 反扒 + options.add_argument("--profile-directory=Default") + print( + "如果报错Selenium.common.exceptions.WebDriverException: Message: unknown error: Chrome failed to start: exited abnormally,说明有之前运行的Chrome实例没有正常关闭,请关闭之前打开的所有Chrome实例后再运行程序即可。") + print( + "If you get an error Selenium.common.exceptions.WebDriverException: Message: unknown error: Chrome failed to start: exited abnormally, it means that there is a Chrome instance that was not closed properly before, please close all Chrome instances that were opened before running the program.") threads = [] - for i in c.id: - # print(options) - print("id: ", i) + for i in range(len(c.ids)): + id = c.ids[i] + # option = tmp_options[i]["option"] + options = tmp_options[i]["options"] + print("id: ", id) if c.read_type == "remote": print("remote") content = requests.get( - c.server_address + "/queryExecutionInstance?id=" + str(i)) + c.server_address + "/queryExecutionInstance?id=" + str(id)) service = json.loads(content.text) # 加载服务信息 else: print("local") - with open("execution_instances/" + str(i) + ".json", 'r', encoding='utf-8') as f: + with open("execution_instances/" + str(id) + ".json", 'r', encoding='utf-8') as f: content = f.read() service = json.loads(content) # 加载服务信息 - print("Task Name:", service["name"]) - print("任务名称:", service["name"]) + try: + print("Task Name:", service["name"]) + print("任务名称:", service["name"]) + except: + print(f"Cannot find task with id: {str(id)}, please check whether {str(id)}.json exists in the 'execution_instances' folder.") + print(f"未找到id为{str(id)}的任务,请检查'execution_instances'文件夹中是否存在{str(id)}.json文件。") + continue try: cloudflare = service["cloudflare"] except: cloudflare = 0 if cloudflare == 0: options.add_argument('log-level=3') # 隐藏日志 - option.add_argument('log-level=3') # 隐藏日志 + # option.add_argument('log-level=3') # 隐藏日志 + path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id)) + print("Data path:", path) options.add_experimental_option("prefs", { # 设置文件下载路径 - "download.default_directory": "Data/Task_" + str(i), + "download.default_directory": path, "download.prompt_for_download": False, # 禁止下载提示框 "plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}], "download.directory_upgrade": True, "download.extensions_to_open": "applications/pdf", - "plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF - }) - option.add_experimental_option("prefs", { - # 设置文件下载路径 - "download.default_directory": "Data/Task_" + str(i), - "download.prompt_for_download": False, # 禁止下载提示框 - "plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}], - "download.directory_upgrade": True, - "download.extensions_to_open": "applications/pdf", - "plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF + "plugins.always_open_pdf_externally": True, # 总是在外部程序中打开PDF + "safebrowsing_for_trusted_sources_enabled": False, + "safebrowsing.enabled": False, + 'safebrowsing.disable_download_protection': True, + 'profile.default_content_settings.popups': 0, }) + # option.add_experimental_option("prefs", { + # # 设置文件下载路径 + # "download.default_directory": path, + # "download.prompt_for_download": False, # 禁止下载提示框 + # "plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}], + # "download.directory_upgrade": True, + # "download.extensions_to_open": "applications/pdf", + # "plugins.always_open_pdf_externally": True, # 总是在外部程序中打开PDF + # "safebrowsing_for_trusted_sources_enabled": False, + # "safebrowsing.enabled": False, + # 'safebrowsing.enabled': False, + # 'safebrowsing.disable_download_protection': True, + # 'profile.default_content_settings.popups': 0, + # }) try: if service["environment"] == 1: - option.add_experimental_option( - 'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览 + # option.add_experimental_option( + # 'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览 options.add_experimental_option( 'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览 except: pass - browser_t = MyChrome( - options=options, chrome_options=option, executable_path=driver_path) + # browser_t = MyChrome( + # options=options, chrome_options=option, executable_path=driver_path) + selenium_service = Service(executable_path=driver_path) + browser_t = MyChrome(service=selenium_service, options=options) elif cloudflare == 1: if sys.platform == "win32": options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器 - # options.add_argument("--auto-open-devtools-for-tabs") + # options.add_argument("--auto-open-devtools-for-tabs") # options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器 - browser_t = MyUCChrome(options=options, driver_executable_path=driver_path) + browser_t = MyUCChrome( + options=options, driver_executable_path=driver_path) links = list(filter(isnotnull, service["links"].split("\n"))) - browser_t.execute_script('window.open("'+ links[0] +'","_blank");') # open page in new tab - time.sleep(5) # wait until page has loaded - browser_t.switch_to.window(browser_t.window_handles[1]) # switch to new tab + # open page in new tab + browser_t.execute_script( + 'window.open("' + links[0] + '","_blank");') + time.sleep(5) # wait until page has loaded + browser_t.switch_to.window( + browser_t.window_handles[1]) # switch to new tab # browser_t = uc.Chrome() else: print("Cloudflare模式只支持Windows x64平台。") @@ -1974,27 +2164,27 @@ if __name__ == '__main__': sys.exit() event = Event() event.set() - thread = BrowserThread(browser_t, i, service, - c.version, event, c.saved_file_name, config=config) - print("Thread with task id: ", i, " is created") + thread = BrowserThread(browser_t, id, service, + c.version, event, c.saved_file_name, config=config, option=tmp_options[i]) + print("Thread with task id: ", id, " is created") threads.append(thread) thread.start() # Set the pause operation # if sys.platform != "linux": # time.sleep(3) - # print("\n\n----------------------------------") - # print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。") - # print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.") - # print("----------------------------------\n\n") # Thread(target=check_pause, args=("p", event)).start() # else: time.sleep(3) - press_time = {"duration": 0, "is_pressed": False} + try: + pause_key = service["pauseKey"] + except: + pause_key = "p" + press_time = {"duration": 0, "is_pressed": False, "pause_key": pause_key} print("\n\n----------------------------------") print( - "正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。") + "正在运行任务,长按键盘" + pause_key + "键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按" + pause_key + "键。") print( - "Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.") + "Running task, long press '" + pause_key + "' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press '" + pause_key + "' again.") print("----------------------------------\n\n") # if cloudflare: # print("过Cloudflare验证模式有时候会不稳定,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。") @@ -2002,7 +2192,8 @@ if __name__ == '__main__': # 使用监听器监听键盘输入 try: if c.keyboard: - with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener: + with Listener(on_press=on_press_creator(press_time, event), + on_release=on_release_creator(event, press_time)) as listener: listener.join() except: pass @@ -2014,10 +2205,3 @@ if __name__ == '__main__': for thread in threads: print() thread.join() - - for thread in threads: - thread.browser.quit() - # print("Thread with task id: ", thread.id, " is closed") - print("程序已运行完成,请手动关闭此窗口。") - print( - "The program has finished running, please manually close this window.") diff --git a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py index a86f670..7934e52 100644 --- a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py +++ b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/myChrome.py @@ -37,19 +37,21 @@ class MyChrome(webdriver.Chrome): except Exception as e: print(e) find_element = False - # 遍历所有的 iframe 并点击里面的元素 + # 遍历所有的 iframe 并查找里面的元素 for iframe in iframes: # 切换到 iframe super().switch_to.default_content() super().switch_to.frame(iframe) self.iframe_env = True try: - # 在 iframe 中查找并点击元素 + # 在 iframe 中查找元素 # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 element = super().find_element(by=by, value=value) find_element = True - except: - print("No such element found in the iframe") + except NoSuchElementException as e: + print(f"No such element found in the iframe: {str(e)}") + except Exception as e: + print(f"Exception: {str(e)}") # 完成操作后切回主文档 # super().switch_to.default_content() if find_element: @@ -68,14 +70,14 @@ class MyChrome(webdriver.Chrome): # 获取所有的 iframe iframes = super().find_elements(By.CSS_SELECTOR, "iframe") find_element = False - # 遍历所有的 iframe 并点击里面的元素 + # 遍历所有的 iframe 并找到里面的元素 for iframe in iframes: # 切换到 iframe try: super().switch_to.default_content() super().switch_to.frame(iframe) self.iframe_env = True - # 在 iframe 中查找并点击元素 + # 在 iframe 中查找元素 # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 elements = super().find_elements(by=by, value=value) if len(elements) > 0: @@ -84,8 +86,10 @@ class MyChrome(webdriver.Chrome): # super().switch_to.default_content() if find_element: return elements - except: - print("No such element found in the iframe") + except NoSuchElementException as e: + print(f"No such element found in the iframe: {str(e)}") + except Exception as e: + print(f"Exception: {str(e)}") if not find_element: raise NoSuchElementException else: @@ -117,19 +121,21 @@ if sys.platform != "darwin": except Exception as e: print(e) find_element = False - # 遍历所有的 iframe 并点击里面的元素 + # 遍历所有的 iframe 并找到里面的元素 for iframe in iframes: # 切换到 iframe super().switch_to.default_content() super().switch_to.frame(iframe) self.iframe_env = True try: - # 在 iframe 中查找并点击元素 + # 在 iframe 中查找元素 # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 element = super().find_element(by=by, value=value) find_element = True - except: - print("No such element found in the iframe") + except NoSuchElementException as e: + print(f"No such element found in the iframe: {str(e)}") + except Exception as e: + print(f"Exception: {str(e)}") # 完成操作后切回主文档 # super().switch_to.default_content() if find_element: @@ -148,14 +154,14 @@ if sys.platform != "darwin": # 获取所有的 iframe iframes = super().find_elements(By.CSS_SELECTOR, "iframe") find_element = False - # 遍历所有的 iframe 并点击里面的元素 + # 遍历所有的 iframe 并查找里面的元素 for iframe in iframes: # 切换到 iframe try: super().switch_to.default_content() super().switch_to.frame(iframe) self.iframe_env = True - # 在 iframe 中查找并点击元素 + # 在 iframe 中查找元素 # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 elements = super().find_elements(by=by, value=value) if len(elements) > 0: @@ -164,8 +170,10 @@ if sys.platform != "darwin": # super().switch_to.default_content() if find_element: return elements - except: - print("No such element found in the iframe") + except NoSuchElementException as e: + print(f"No such element found in the iframe: {str(e)}") + except Exception as e: + print(f"Exception: {str(e)}") if not find_element: raise NoSuchElementException else: diff --git a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py index 77f9dc9..bdbde15 100644 --- a/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py +++ b/.temp_to_pub/EasySpider_MacOS_all_arch/Code/utils.py @@ -31,7 +31,7 @@ def lowercase_tags_in_xpath(xpath): def on_press_creator(press_time, event): def on_press(key): try: - if key.char == 'p': + if key.char == press_time["pause_key"]: if press_time["is_pressed"] == False: # 没按下p键时,记录按下p键的时间 press_time["duration"] = time.time() press_time["is_pressed"] = True @@ -39,14 +39,14 @@ def on_press_creator(press_time, event): duration = time.time() - press_time["duration"] if duration > 2: if event._flag == False: - print("任务执行中,长按p键暂停执行。") - print("Task is running, long press 'p' to pause.") + print("任务执行中,长按" + press_time["pause_key"] + "键暂停执行。") + print("Task is running, long press '" + press_time["pause_key"] + "' to pause.") # 设置Event的值为True,使得线程b可以继续执行 event.set() else: # 设置Event的值为False,使得线程b暂停执行 - print("任务已暂停,长按p键继续执行...") - print("Task paused, long press 'p' to continue...") + print("任务已暂停,长按" + press_time["pause_key"] + "键继续执行...") + print("Task paused, long press '" + press_time["pause_key"] + "' to continue...") event.clear() press_time["duration"] = time.time() press_time["is_pressed"] = False @@ -176,26 +176,36 @@ def write_to_csv(file_name, data, record): f_csv.writerow(to_write) f.close() - -def eval_repl(matchobj): - print(matchobj.group(1)) - return str(eval(matchobj.group(1), globals(), locals())) - - - def replace_field_values(orginal_text, outputParameters, browser=None): pattern = r'Field\["([^"]+)"\]' try: replaced_text = re.sub( pattern, lambda match: outputParameters.get(match.group(1), ''), orginal_text) - if replaced_text.find("EVAL") != -1: # 如果返回值中包含EVAL + if re.search(r'eval\(', replaced_text, re.IGNORECASE): # 如果返回值中包含EVAL replaced_text = replaced_text.replace("self.", "browser.") - replaced_text = re.sub(r'EVAL\("(.*?)"\)', lambda match: str(eval(match.group(1))), replaced_text) - except: + pattern = re.compile(r'(?i)eval\("(.+?)"\)') + # 循环替换所有匹配到的eval语句 + while True: + match = pattern.search(replaced_text) + if not match: + break + # 执行eval并将其结果转换为字符串形式 + eval_replaced_text = str(eval(match.group(1))) + # 替换eval语句 + replaced_text = replaced_text.replace(match.group(0), eval_replaced_text) + except Exception as e: + print("eval替换失败,请检查eval语句是否正确。| Failed to replace eval, please check if the eval statement is correct.") replaced_text = orginal_text return replaced_text +def readCode(code): + if code.startswith("outside:"): + file_name = os.path.join(os.path.abspath("./"), code[8:]) + with open(file_name, 'r', encoding='utf-8-sig') as file_obj: + code = file_obj.read() + return code + def write_to_json(file_name, data, types, record, keys): keys = list(keys) # Prepare empty list for data diff --git a/.temp_to_pub/EasySpider_MacOS_all_arch/myCode.py b/.temp_to_pub/EasySpider_MacOS_all_arch/myCode.py new file mode 100644 index 0000000..80e17f6 --- /dev/null +++ b/.temp_to_pub/EasySpider_MacOS_all_arch/myCode.py @@ -0,0 +1,57 @@ +""" +这是一个示例代码文件,可以直接在这里写Python代码,然后在程序中的exec操作中调用。如果此文件名称为myCode.py,请将此文件放置在EasySpider程序目录下(和Data/文件夹同级),那么在程序中的exec操作中可以直接写outside:myCode.py来调用此文件中的代码,示例: + +1. 用self.browser表示当前操作的浏览器,可直接用selenium的API进行操作,如self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END)即可滚动到页面最下方。 +2. 自定义一个全局变量:self.myVar = 1 +3. 操纵上面定义的全局变量:self.myVar = self.myVar + 1 +4. 打印上面定义的全局变量:print(self.myVar) +5. 将自定义变量的值赋值为某个字段提取的值:self.myVar = self.outputParameters["字段名"] +6. 修改某个字段提取的值:self.outputParameters["字段名"] = "新值" + +对于更加复杂的操作,请直接下载源代码并编译执行。 + +This is a sample code snippet file. You can directly write Python code here, and then call it in the program using an `exec` operation. If this file is named myCode.py, please place this file under the EasySpider program directory (at the same level as the Data/ folder). Then, in the program's `exec` operation, you can directly write outside:myCode.py to invoke the code from this file. Examples: + +1. Use self.browser to refer to the current browser being operated on. You can directly utilize the selenium API to perform actions. For instance, self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END) will scroll to the bottom of the page. +2. Define a global variable: self.myVar = 1 +3. Manipulate the above-defined global variable: self.myVar = self.myVar + 1 +4. Print the above-defined global variable: print(self.myVar) +5. Assign a value to the custom variable from a value extracted for some field: self.myVar = self.outputParameters["field name"] +6. Modify the value extracted for some field: self.outputParameters["field name"] = "new value" + +For more complex operations, please download the source code and compile it for execution. +""" + +# 请在下面编写你的代码,不要有代码缩进!!! | Please write your code below, do not indent the code!!! + +# 导包 | Import packages +from selenium.common.exceptions import ElementClickInterceptedException + +# 定义一个函数 | Define a function +def test(n = 0): + for i in range(0, n): + if i % 2 == 0: + print(i) + return "test" + +# 异常捕获 | Exception capture +try: + # 使用XPath定位元素并点击浏览器中元素 | Use XPath to locate the element and click the element in the browser + element = self.browser.find_element(By.XPATH, "//*[contains(@class, 'LeftSide_menu_list__qXCeM')]/div[1]/a[1]") # 这里请忽略IDE的报错,因为代码是嵌入到程序中的,IDE无法识别self变量和By变量是正常的 | Please ignore the error reported by the IDE, because the code is embedded in the program, and the IDE cannot recognize that the self variable and By variable are normal + element.click() + print("点击成功|Click success") +except ElementClickInterceptedException: + # 如果元素被遮挡,点击失败 + print("元素被遮挡,无法点击|The element is blocked and cannot be clicked") +except Exception as e: + # 打印其他异常 + print("发生了一个异常|An exception occurred", e) +finally: + # 测试函数 | Test function + self.a = 1 + print("a = ", self.a) + self.a = self.a + 1 + print("a = ", self.a) + print("All parameters:", self.outputParameters) + print(test(3)) + print("执行完毕|Execution completed") diff --git a/ElectronJS/EasySpider_en.crx b/ElectronJS/EasySpider_en.crx index 098a53a..4011b9b 100644 Binary files a/ElectronJS/EasySpider_en.crx and b/ElectronJS/EasySpider_en.crx differ diff --git a/ElectronJS/EasySpider_zh.crx b/ElectronJS/EasySpider_zh.crx index 124272e..df5d620 100644 Binary files a/ElectronJS/EasySpider_zh.crx and b/ElectronJS/EasySpider_zh.crx differ diff --git a/ElectronJS/config.json b/ElectronJS/config.json index 252c802..0d7be07 100644 --- a/ElectronJS/config.json +++ b/ElectronJS/config.json @@ -1 +1 @@ -{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"} \ No newline at end of file +{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"/Users/naibo/Documents/EasySpider/ElectronJS/user_data"} \ No newline at end of file diff --git a/ElectronJS/package-lock.json b/ElectronJS/package-lock.json index e3cc5cb..9627f3b 100644 --- a/ElectronJS/package-lock.json +++ b/ElectronJS/package-lock.json @@ -15,6 +15,7 @@ "formidable": "^3.5.0", "http": "^0.0.1-security", "multer": "^1.4.5-lts.1", + "node-abi": "^3.52.0", "node-window-manager": "^2.2.4", "selenium-webdriver": "^4.16.0", "ws": "^8.12.0", @@ -3914,9 +3915,9 @@ "license": "MIT" }, "node_modules/node-abi": { - "version": "3.45.0", - "dev": true, - "license": "MIT", + "version": "3.52.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.52.0.tgz", + "integrity": "sha512-JJ98b02z16ILv7859irtXn4oUaFWADtvkzy2c0IAatNVX2Mc9Yoh8z6hZInn3QwvMEYhHuQloYi+TTQy67SIdQ==", "dependencies": { "semver": "^7.3.5" }, @@ -4814,7 +4815,6 @@ }, "node_modules/semver": { "version": "7.5.3", - "dev": true, "license": "ISC", "dependencies": { "lru-cache": "^6.0.0" @@ -4834,7 +4834,6 @@ }, "node_modules/semver/node_modules/lru-cache": { "version": "6.0.0", - "dev": true, "license": "ISC", "dependencies": { "yallist": "^4.0.0" @@ -5665,7 +5664,6 @@ }, "node_modules/yallist": { "version": "4.0.0", - "dev": true, "license": "ISC" }, "node_modules/yargs": { diff --git a/ElectronJS/package.json b/ElectronJS/package.json index 51748f6..f040047 100644 --- a/ElectronJS/package.json +++ b/ElectronJS/package.json @@ -37,6 +37,7 @@ "formidable": "^3.5.0", "http": "^0.0.1-security", "multer": "^1.4.5-lts.1", + "node-abi": "^3.52.0", "node-window-manager": "^2.2.4", "selenium-webdriver": "^4.16.0", "ws": "^8.12.0", @@ -79,4 +80,4 @@ "publishers": [] } } -} \ No newline at end of file +} diff --git a/ElectronJS/tasks/229.json b/ElectronJS/tasks/229.json new file mode 100644 index 0000000..b945ac4 --- /dev/null +++ b/ElectronJS/tasks/229.json @@ -0,0 +1 @@ +{"id":229,"name":"知乎 - 有问题,就会有答案","url":"https://www.zhihu.com","links":"https://www.zhihu.com","create_time":"07/12/2023, 03:26:24","update_time":"07/12/2023, 03:43:34","version":"0.6.0","saveThreshold":10,"quitWaitTime":6,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"desc":"https://www.zhihu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","//div[contains(., '死刑执行前可以谎称肚')]","/html/body/div[last()-7]/div/main/div/div/div[last()-1]/div/div/div/div/div/div[last()-12]/div/div/div/div/h2/div"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"unique_index":"onlvi030w9jlpu5tjzb","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]} \ No newline at end of file diff --git a/ElectronJS/update_chrome.py b/ElectronJS/update_chrome.py index 8bc85ac..df5e927 100644 --- a/ElectronJS/update_chrome.py +++ b/ElectronJS/update_chrome.py @@ -48,7 +48,7 @@ def copy_folder(source_folder, destination_folder): def get_chrome_version(): - version = "115" + version = "120" if sys.platform == "win32": version_re = re.compile(r"^[1-9]\d*\.\d*.\d*") try: diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index e91a2b5..f36c93d 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,8 @@ "justMyCode": false, // "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--ids", "[52]", "--headless", "0", "--user_data", "1", "--keyboard", "1"] + // "args": ["--ids", "[1]", "--headless", "0", "--user_data", "1", "--keyboard", "1"] + "args": "--ids '[3]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name" } ] } \ No newline at end of file