From acd1d71e0484ceef493b45a37f46bcb2a8177dc2 Mon Sep 17 00:00:00 2001 From: NaiboWang-Alienware Date: Tue, 18 Jul 2023 21:22:43 +0800 Subject: [PATCH] Version 0.5.0 and log format --- .temp_to_pub/compress.py | 2 +- ElectronJS/change_version.py | 5 +- ElectronJS/package.json | 4 +- ElectronJS/src/index.html | 2 +- ElectronJS/src/taskGrid/logic.js | 4 +- ExecuteStage/.vscode/launch.json | 2 +- ExecuteStage/easyspider_executestage.py | 389 ++++++++++++------------ Extension/manifest_v3/package.json | 2 +- Extension/manifest_v3/src/manifest.json | 4 +- 9 files changed, 200 insertions(+), 214 deletions(-) diff --git a/.temp_to_pub/compress.py b/.temp_to_pub/compress.py index 907af61..f0beab0 100644 --- a/.temp_to_pub/compress.py +++ b/.temp_to_pub/compress.py @@ -50,7 +50,7 @@ def compress_folder_to_7z_split(folder_path, output_file): except: subprocess.call(["7zz", "a", "-v95m", output_file, folder_path]) -easyspider_version = "0.3.6" +easyspider_version = "0.5.0" if __name__ == "__main__": diff --git a/ElectronJS/change_version.py b/ElectronJS/change_version.py index 46586fd..7bd6d31 100644 --- a/ElectronJS/change_version.py +++ b/ElectronJS/change_version.py @@ -30,7 +30,7 @@ def update_file_version(file_path, new_version, key="当前版本/Current Versio file.write(line) -version = "0.3.6" +version = "0.5.0" # py html js @@ -42,9 +42,6 @@ if __name__ == "__main__": file_path = "./src/taskGrid/logic.js" update_file_version(file_path, version, key='"version": "') -# file_path = "./src/taskGrid/logic.js" -# update_file_version(file_path, version, key='"version": "') - file_path = "../ExecuteStage/easyspider_executestage.py" update_file_version(file_path, version, key='"version": "') diff --git a/ElectronJS/package.json b/ElectronJS/package.json index a496e14..9e8fe9d 100644 --- a/ElectronJS/package.json +++ b/ElectronJS/package.json @@ -1,7 +1,7 @@ { "name": "easy-spider", "productName": "EasySpider", - "version": "0.3.6", + "version": "0.5.0", "icon": "./favicon", "description": "NoCode Visual Web Crawler", "main": "main.js", @@ -66,7 +66,7 @@ ], "packagerConfig": { "icon": "./favicon", - "appVersion": "0.3.6", + "appVersion": "0.5.0", "name": "EasySpider", "executableName": "EasySpider", "appCopyright": "Naibo Wang (naibowang@foxmail.com)", diff --git a/ElectronJS/src/index.html b/ElectronJS/src/index.html index 7b7cf2e..7233348 100644 --- a/ElectronJS/src/index.html +++ b/ElectronJS/src/index.html @@ -40,7 +40,7 @@

English

-

当前版本/Current Version: v0.3.6

+

当前版本/Current Version: v0.5.0

Github最新版本/Newest Version:{{newest_version}}

diff --git a/ElectronJS/src/taskGrid/logic.js b/ElectronJS/src/taskGrid/logic.js index aceb7ec..b772dc2 100644 --- a/ElectronJS/src/taskGrid/logic.js +++ b/ElectronJS/src/taskGrid/logic.js @@ -316,7 +316,7 @@ if (mobile == "true") { } let serviceInfo = { - "version": "0.3.6" + "version": "0.5.0" }; function saveService(type) { @@ -450,7 +450,7 @@ function saveService(type) { "links": links, "create_time": parseInt(serviceId) == -1 ? new Date().toLocaleString() : $("#create_time").val(), "update_time": new Date().toLocaleString(), - "version": "0.3.6", + "version": "0.5.0", "saveThreshold": saveThreshold, "cloudflare": cloudflare, "environment": environment, diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index 32c7152..d7c1d6f 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,7 @@ "justMyCode": false, // "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--id", "[81]", "--headless", "0", "--user_data", "1", "--keyboard", "0"] + "args": ["--id", "[82]", "--headless", "0", "--user_data", "1", "--keyboard", "0"] } ] } \ No newline at end of file diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 3d058c9..baf9187 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -55,6 +55,7 @@ desired_capabilities["pageLoadStrategy"] = "none" class BrowserThread(Thread): def __init__(self, browser_t, id, service, version, event, saveName, config): Thread.__init__(self) + self.logs = io.StringIO() self.browser = browser_t self.config = config self.version = version @@ -78,8 +79,8 @@ class BrowserThread(Thread): now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") self.saveName = self.saveName.replace("current_time", now) - print("Save Name for task ID", i, "is:", self.saveName) - print("任务ID", i, "的保存文件名为:", self.saveName) + self.print_and_log("Save Name for task ID", i, "is:", self.saveName) + self.print_and_log("任务ID", i, "的保存文件名为:", self.saveName) if not os.path.exists("Data/Task_" + str(i)): os.mkdir("Data/Task_" + str(i)) if not os.path.exists("Data/Task_" + str(i) + "/" + self.saveName): @@ -95,20 +96,20 @@ class BrowserThread(Thread): except: pass if self.startSteps != 0: - print("此模式下,任务ID", self.id, "将从上次退出的步骤开始执行,之前已采集条数为", - self.startSteps, "条。") - print("In this mode, task ID", self.id, - "will start from the last step, before we already collected", self.startSteps, " items.") + self.print_and_log("此模式下,任务ID", self.id, "将从上次退出的步骤开始执行,之前已采集条数为", + self.startSteps, "条。") + self.print_and_log("In this mode, task ID", self.id, + "will start from the last step, before we already collected", self.startSteps, " items.") else: - print("此模式下,任务ID", self.id, - "将从头开始执行,如果需要从上次退出的步骤开始执行,请在保存任务时设置是否从上次保存位置开始执行为“是”。") - print("In this mode, task ID", self.id, - "will start from the beginning, if you want to start from the last step, please set the option 'start from the last step' to 'yes' when saving the task.") + self.print_and_log("此模式下,任务ID", self.id, + "将从头开始执行,如果需要从上次退出的步骤开始执行,请在保存任务时设置是否从上次保存位置开始执行为“是”。") + self.print_and_log("In this mode, task ID", self.id, + "will start from the beginning, if you want to start from the last step, please set the option 'start from the last step' to 'yes' when saving the task.") stealth_path = driver_path[:driver_path.find( "chromedriver")] + "stealth.min.js" with open(stealth_path, 'r') as f: js = f.read() - print("Loading stealth.min.js") + self.print_and_log("Loading stealth.min.js") self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', { 'source': js}) # TMALL 反扒 WebDriverWait(self.browser, 10) @@ -128,15 +129,16 @@ class BrowserThread(Thread): pass else: # 0.3.1以下版本的EasySpider不兼容0.3.1及以上版本的EasySpider if service["version"] != version: - print("版本不一致,请使用" + - service["version"] + "版本的EasySpider运行该任务!") - print("Version not match, please use EasySpider " + - service["version"] + " to run this task!") + self.print_and_log("版本不一致,请使用" + + service["version"] + "版本的EasySpider运行该任务!") + self.print_and_log("Version not match, please use EasySpider " + + service["version"] + " to run this task!") self.browser.quit() sys.exit() except: # 0.2.0版本没有version字段,所以直接退出 - print("版本不一致,请使用v0.2.0版本的EasySpider运行该任务!") - print("Version not match, please use EasySpider v0.2.0 to run this task!") + self.print_and_log("版本不一致,请使用v0.2.0版本的EasySpider运行该任务!") + self.print_and_log( + "Version not match, please use EasySpider v0.2.0 to run this task!") self.browser.quit() sys.exit() try: @@ -160,14 +162,14 @@ class BrowserThread(Thread): self.mysql.create_table(self.saveName, service["outputParameters"]) self.writeMode = 2 if self.writeMode == 1: - print("追加模式") - print("Append Mode") + self.print_and_log("追加模式") + self.print_and_log("Append Mode") elif self.writeMode == 0: - print("新建模式") - print("New Mode") + self.print_and_log("新建模式") + self.print_and_log("New Mode") elif self.writeMode == 2: - print("MySQL模式") - print("MySQL Mode") + self.print_and_log("MySQL模式") + self.print_and_log("MySQL Mode") self.containJudge = service["containJudge"] # 是否含有判断语句 self.outputParameters = {} self.service = service @@ -232,8 +234,8 @@ class BrowserThread(Thread): if self.task_version <= "0.3.5": # 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath node["parameters"]["xpath"] = "" - print("您的任务版本号为" + self.task_version + - ",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath") + self.print_and_log("您的任务版本号为" + self.task_version + + ",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath") elif node["option"] == 3: # 提取数据操作 node["parameters"]["recordASField"] = 0 paras = node["parameters"]["paras"] @@ -256,8 +258,9 @@ class BrowserThread(Thread): except: node["parameters"]["recordASField"] += 1 if para["contentType"] == 8: - print("默认的ddddocr识别功能如果觉得不好用,可以自行修改源码get_content函数->contentType == 8的位置换成自己想要的OCR模型然后自己编译运行;或者可以先设置采集内容类型为“元素截图”把图片保存下来,然后用自定义操作调用自己写的程序,程序的功能是读取这个最新生成的图片,然后用好用的模型,如PaddleOCR把图片识别出来,然后把返回值返回给程序作为参数输出。") - print("If you think the default ddddocr function is not good enough, you can modify the source code get_content function -> contentType == 8 position to your own OCR model and then compile and run it; or you can first set the content type of the crawler to \"Element Screenshot\" to save the picture, and then call your own program with custom operations. The function of the program is to read the latest generated picture, then use a good model, such as PaddleOCR to recognize the picture, and then return the return value as a parameter output to the program.") + self.print_and_log( + "默认的ddddocr识别功能如果觉得不好用,可以自行修改源码get_content函数->contentType == 8的位置换成自己想要的OCR模型然后自己编译运行;或者可以先设置采集内容类型为“元素截图”把图片保存下来,然后用自定义操作调用自己写的程序,程序的功能是读取这个最新生成的图片,然后用好用的模型,如PaddleOCR把图片识别出来,然后把返回值返回给程序作为参数输出。") + self.print_and_log("If you think the default ddddocr function is not good enough, you can modify the source code get_content function -> contentType == 8 position to your own OCR model and then compile and run it; or you can first set the content type of the crawler to \"Element Screenshot\" to save the picture, and then call your own program with custom operations. The function of the program is to read the latest generated picture, then use a good model, such as PaddleOCR to recognize the picture, and then return the return value as a parameter output to the program.") if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2: para["optimizable"] = True else: @@ -277,8 +280,8 @@ class BrowserThread(Thread): if self.task_version <= "0.3.5": # 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath node["parameters"]["xpath"] = "" - print("您的任务版本号为" + self.task_version + - ",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath") + self.print_and_log("您的任务版本号为" + self.task_version + + ",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath") def readFromExcel(self): if self.inputExcel == "": @@ -286,10 +289,10 @@ class BrowserThread(Thread): try: workbook = load_workbook(self.inputExcel) except: - print("读取Excel失败,将会使用默认参数执行任务,请检查文件路径是否正确:", - os.path.abspath(self.inputExcel)) - print("Failed to read Excel, will execute the task with default parameters, please check if the file path is correct: ", - os.path.abspath(self.inputExcel)) + self.print_and_log("读取Excel失败,将会使用默认参数执行任务,请检查文件路径是否正确:", + os.path.abspath(self.inputExcel)) + self.print_and_log("Failed to read Excel, will execute the task with default parameters, please check if the file path is correct: ", + os.path.abspath(self.inputExcel)) time.sleep(5) return 0 @@ -335,36 +338,40 @@ class BrowserThread(Thread): elif node["option"] == 8: node["parameters"]["textList"] = value break - print("已从Excel读取输入参数,覆盖了原有输入参数。") - print("Alread read input parameters from Excel and overwrite the original input parameters.") + self.print_and_log("已从Excel读取输入参数,覆盖了原有输入参数。") + self.print_and_log( + "Alread read input parameters from Excel and overwrite the original input parameters.") def run(self): # 挨个执行程序 for i in range(len(self.links)): - print("正在执行第", i + 1, "/ ", len(self.links), "个链接") - print("Executing link", i + 1, "/ ", len(self.links)) + self.print_and_log("正在执行第", i + 1, "/ ", len(self.links), "个链接") + self.print_and_log("Executing link", i + 1, + "/ ", len(self.links)) self.executeNode(0) self.urlId = self.urlId + 1 files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName) # 如果目录为空,则删除该目录 if not files: os.rmdir("Data/Task_" + str(self.id) + "/" + self.saveName) - print("Done!") - print("执行完成!") - self.recordLog("Done!") + self.print_and_log("Done!") + self.print_and_log("执行完成!") self.saveData(exit=True) if self.outputFormat == "mysql": self.mysql.close() - def recordLog(self, str=""): - self.log = self.log + str + "\n" + def recordLog(self, *args, **kwargs): + now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + print(now + ":", *args, file=self.logs, **kwargs) - # 控制台打印log函数 + # 定义一个自定义的 print 函数,它将内容同时打印到屏幕和文件中 + def print_and_log(self, *args, **kwargs): + now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + # 将内容打印到屏幕 + print(*args, **kwargs) - def Log(self, text, text2=""): - switch = False - if switch: - print(text, text2) + # 将内容写入文件 + print(now + ":", *args, file=self.logs, **kwargs) # @atexit.register # def clean(self): @@ -378,6 +385,7 @@ class BrowserThread(Thread): # 写入日志 with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_log.txt', 'a', encoding='utf-8-sig') as file_obj: file_obj.write(self.log) + file_obj.write(self.logs.getvalue()) file_obj.close() # 写入已执行步数 with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_steps.txt', 'w', encoding='utf-8-sig') as file_obj: @@ -412,7 +420,8 @@ class BrowserThread(Thread): if scrollType != 0 and para["scrollCount"] > 0: # 控制屏幕向下滚动 if scrollType == 1 or scrollType == 2: for i in range(para["scrollCount"]): - self.Log("Wait for set second after screen scrolling") + self.recordLog( + "Wait for set second after screen scrolling.") body = self.browser.find_element( By.CSS_SELECTOR, "body", iframe=para["iframe"]) if scrollType == 1: @@ -440,8 +449,8 @@ class BrowserThread(Thread): newBodyText += iframe_text self.browser.switch_to.default_content() if newBodyText == bodyText: - print("页面已检测不到新内容,停止滚动。") - print( + self.print_and_log("页面已检测不到新内容,停止滚动。") + self.print_and_log( "No new content detected on the page, stop scrolling.") break else: @@ -449,23 +458,24 @@ class BrowserThread(Thread): body = self.browser.find_element( By.CSS_SELECTOR, "body", iframe=para["iframe"]) body.send_keys(Keys.END) - print("滚动到底部,第", i + 1, "次。") - print("Scroll to the bottom, the", i + 1, "time.") + self.print_and_log("滚动到底部,第", i + 1, "次。") + self.print_and_log( + "Scroll to the bottom, the", i + 1, "time.") i = i + 1 try: time.sleep(para["scrollWaitTime"]) # 下拉完等待 except: pass except: - self.Log('Time out after set seconds when scrolling. ') - self.recordLog('Time out after set seconds when scrolling') + self.print_and_log('Time out after set seconds when scrolling. ') try: self.browser.execute_script('window.stop()') except: pass if scrollType != 0 and para["scrollCount"] > 0: # 控制屏幕向下滚动 for i in range(para["scrollCount"]): - self.Log("Wait for set second after screen scrolling") + self.print_and_log( + "Wait for set second after screen scrolling") body = self.browser.find_element( By.CSS_SELECTOR, "body", iframe=para["iframe"]) if scrollType == 1: @@ -485,7 +495,7 @@ class BrowserThread(Thread): return "" if max_wait_time == 0: max_wait_time = 999999 - # print(codeMode, code) + # self.print_and_log(codeMode, code) # 将value中的Field[""]替换为outputParameters中的键值 code = replace_field_values(code, self.outputParameters) if iframe and self.browser.iframe_env == False: @@ -502,7 +512,7 @@ class BrowserThread(Thread): self.browser.iframe_env = True break except: - print("Iframe switch failed") + self.print_and_log("Iframe switch failed") elif not iframe and self.browser.iframe_env == True: self.browser.switch_to.default_content() self.browser.iframe_env = False @@ -528,15 +538,15 @@ class BrowserThread(Thread): try: output = exec(code) except Exception as e: - print("执行下面的代码时出错:" + code, ",错误为:", e) - print("Error executing the following code:" + - code, ", error is:", e) + self.print_and_log("执行下面的代码时出错:" + code, ",错误为:", e) + self.print_and_log("Error executing the following code:" + + code, ", error is:", e) elif int(codeMode) == 6: try: output = eval(code) except Exception as e: - print("获得下面的代码返回值时出错:" + code, ",错误为:", e) - print( + self.print_and_log("获得下面的代码返回值时出错:" + code, ",错误为:", e) + self.print_and_log( "Error executing and getting return value the following code:" + code, ", error is:", e) elif int(codeMode) == 1: self.recordLog("Execute System Call:" + code) @@ -548,13 +558,13 @@ class BrowserThread(Thread): code, capture_output=True, text=True, timeout=max_wait_time, shell=True) # 输出命令返回值 output = output.stdout - print(output) + self.print_and_log(output) except subprocess.TimeoutExpired: # 命令执行时间超过指定值,抛出异常 self.recordLog("Command timed out") self.recordLog("命令执行超时") except Exception as e: - print(e) # 打印异常信息 + self.print_and_log(e) # 打印异常信息 self.recordLog("Command execution failed") self.recordLog("命令执行失败") return str(output) @@ -578,7 +588,7 @@ class BrowserThread(Thread): codeMode, code, max_wait_time, element, iframe=paras["iframe"]) except: output = "" - print("JavaScript execution failed") + self.print_and_log("JavaScript execution failed") elif codeMode == 3: self.BREAK = True elif codeMode == 4: @@ -588,8 +598,8 @@ class BrowserThread(Thread): codeMode, code, max_wait_time, iframe=paras["iframe"]) recordASField = bool(paras["recordASField"]) # if recordASField: - # print("操作<" + node["title"] + ">的返回值为:" + output) - # print("The return value of operation <" + node["title"] + "> is: " + output) + # self.print_and_log("操作<" + node["title"] + ">的返回值为:" + output) + # self.print_and_log("The return value of operation <" + node["title"] + "> is: " + output) self.outputParameters[node["title"]] = output if recordASField: line = new_line(self.outputParameters, @@ -605,8 +615,8 @@ class BrowserThread(Thread): try: optionValue = loopValue.split("~")[index - 1] except: - print("取值失败,可能是因为取值索引超出范围,将使用整个文本值") - print( + self.print_and_log("取值失败,可能是因为取值索引超出范围,将使用整个文本值") + self.print_and_log( "Failed to get value, maybe because the index is out of range, will use the entire text value") else: optionValue = loopValue @@ -631,13 +641,13 @@ class BrowserThread(Thread): elif optionMode == 3: dropdown.select_by_visible_text(optionValue) except: - print("切换下拉框选项失败:", xpath, - para["optionMode"], para["optionValue"]) - print("Failed to change drop-down box option:", - xpath, para["optionMode"], para["optionValue"]) + self.print_and_log("切换下拉框选项失败:", xpath, + para["optionMode"], para["optionValue"]) + self.print_and_log("Failed to change drop-down box option:", + xpath, para["optionMode"], para["optionValue"]) except: - print("找不到下拉框元素:", xpath) - print("Cannot find drop-down box element:", xpath) + self.print_and_log("找不到下拉框元素:", xpath) + self.print_and_log("Cannot find drop-down box element:", xpath) def moveToElement(self, para, loopElement=None, loopPath="", index=0): time.sleep(0.1) # 移动之前等待0.1秒 @@ -663,11 +673,11 @@ class BrowserThread(Thread): try: ActionChains(self.browser).move_to_element(element).perform() except: - print("移动鼠标到元素失败:", xpath) - print("Failed to move mouse to element:", xpath) + self.print_and_log("移动鼠标到元素失败:", xpath) + self.print_and_log("Failed to move mouse to element:", xpath) except: - print("找不到元素:", xpath) - print("Cannot find element:", xpath) + self.print_and_log("找不到元素:", xpath) + self.print_and_log("Cannot find element:", xpath) # 执行节点关键函数部分 @@ -683,8 +693,9 @@ class BrowserThread(Thread): node["parameters"]["waitElement"], self.outputParameters) waitElementTime = float(node["parameters"]["waitElementTime"]) waitElementIframeIndex = node["parameters"]["waitElementInIframe"] - print("等待元素出现:", waitElement) - print("Waiting for element to appear:", waitElement) + self.print_and_log("等待元素出现:", waitElement) + self.print_and_log( + "Waiting for element to appear:", waitElement) if waitElementIframeIndex > 0: iframes = self.browser.find_elements( By.CSS_SELECTOR, "iframe", iframe=False) @@ -697,10 +708,10 @@ class BrowserThread(Thread): self.browser.switch_to.default_content() except Exception as e: if waitElement != "": - print("等待元素出现超时:", waitElement, ",将继续执行。") - print("Timeout waiting for element to appear:", - waitElement, ", will continue to execute.") - print(e) + self.print_and_log("等待元素出现超时:", waitElement, ",将继续执行。") + self.print_and_log("Timeout waiting for element to appear:", + waitElement, ", will continue to execute.") + self.print_and_log(e) self.recordLog("Wait element not found") # 根据不同选项执行不同操作 @@ -708,7 +719,7 @@ class BrowserThread(Thread): for i in node["sequence"]: # 从根节点开始向下读取 self.executeNode(i, loopValue, loopPath, index) elif node["option"] == 1: # 打开网页操作 - self.recordLog("openPage") + self.recordLog("OpenPage") self.openPage(node["parameters"], loopValue) elif node["option"] == 2: # 点击元素 self.recordLog("Click") @@ -716,14 +727,15 @@ class BrowserThread(Thread): elif node["option"] == 3: # 提取数据 # 针对提取数据操作,设置操作开始的步骤,用于不小心关闭后的恢复的增量采集 if self.totalSteps >= self.startSteps: - self.recordLog("getData") + self.recordLog("GetData") self.getData(node["parameters"], loopValue, node["isInLoop"], parentPath=loopPath, index=index) self.saveData() else: # self.getDataStep += 1 - print("跳过第" + str(self.totalSteps) + "次提取数据。") - print("Skip the " + str(self.totalSteps) + "th data extraction.") + self.print_and_log("跳过第" + str(self.totalSteps) + "次提取数据。") + self.print_and_log( + "Skip the " + str(self.totalSteps) + "th data extraction.") self.totalSteps += 1 # 总步数加一 elif node["option"] == 4: # 输入文字 self.inputInfo(node["parameters"], loopValue) @@ -735,10 +747,10 @@ class BrowserThread(Thread): elif node["option"] == 7: # 鼠标移动到元素上 self.moveToElement(node["parameters"], loopValue, loopPath, index) elif node["option"] == 8: # 循环 - self.recordLog("loop") + self.recordLog("Loop") self.loopExecute(node, loopValue, loopPath, index) # 执行循环 elif node["option"] == 9: # 条件分支 - self.recordLog("judge") + self.recordLog("Judge") self.judgeExecute(node, loopValue, loopPath, index) # 执行完之后进行等待 @@ -754,7 +766,6 @@ class BrowserThread(Thread): time.sleep(waitTime) elif waitType == 1: # 随机等待时间 time.sleep(random.uniform(waitTime * 0.5, waitTime * 1.5)) - self.Log("Wait seconds after node executing: ", waitTime) self.event.wait() # 等待事件结束 # 对判断条件的处理 @@ -835,8 +846,6 @@ class BrowserThread(Thread): # 对循环的处理 def loopExecute(self, node, loopValue, clickPath="", index=0): time.sleep(0.1) # 第一次执行循环的时候强制等待1秒 - # self.Log("循环执行前等待0.1秒") - self.Log("Wait 0.1 second before loop") thisHandle = self.browser.current_window_handle # 记录本次循环内的标签页的ID thisHistoryLength = self.browser.execute_script( 'return history.length') # 记录本次循环内的history的length @@ -866,13 +875,15 @@ class BrowserThread(Thread): self.browser.switch_to.default_content() if newBodyText == bodyText: # 如果页面内容无变化 - print("页面已检测不到新内容,停止循环。") - print("No new content detected on the page, stop loop.") + self.print_and_log("页面已检测不到新内容,停止循环。") + self.print_and_log( + "No new content detected on the page, stop loop.") finished = True break else: - print("检测到页面变化,继续循环。") - print("Page changed detected, continue loop.") + self.print_and_log("检测到页面变化,继续循环。") + self.print_and_log( + "Page changed detected, continue loop.") bodyText = newBodyText xpath = replace_field_values( node["parameters"]["xpath"], self.outputParameters) @@ -889,15 +900,13 @@ class BrowserThread(Thread): finished = True break finished = True - self.Log("Click: ", node["parameters"]["xpath"]) - self.recordLog("Click:" + node["parameters"]["xpath"]) + self.recordLog( + "Click: " + node["parameters"]["xpath"]) except NoSuchElementException: # except: - print("Single loop element not found: ", - xpath) - print("找不到要循环的单个元素: ", xpath) - self.recordLog( - "Single loop element not found: " + node["parameters"]["xpath"]) + self.print_and_log("Single loop element not found: ", + xpath) + self.print_and_log("找不到要循环的单个元素: ", xpath) for i in node["sequence"]: # 不带点击元素的把剩余的如提取数据的操作执行一遍 if node["option"] != 2: self.executeNode( @@ -906,20 +915,17 @@ class BrowserThread(Thread): break # 如果找不到元素,退出循环 finally: if not finished: - print("\n\n-------Retrying-------\n\n") - self.Log("-------Retrying-------: ", - node["parameters"]["xpath"]) - self.recordLog("ClickNotFound:" + - node["parameters"]["xpath"]) + self.print_and_log("\n\n-------Retrying-------\n\n") + self.print_and_log("-------Retrying-------: ", + node["parameters"]["xpath"]) for i in node["sequence"]: # 不带点击元素的把剩余的如提取数据的操作执行一遍 if node["option"] != 2: self.executeNode( i, None, xpath, 0) break # 如果找不到元素,退出循环 count = count + 1 - self.Log("Page: ", count) - self.recordLog("Page:" + str(count)) - # print(node["parameters"]["exitCount"], "-------") + self.print_and_log("Page: ", count) + # self.print_and_log(node["parameters"]["exitCount"], "-------") if node["parameters"]["exitCount"] == count: # 如果达到设置的退出循环条件的话 break if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件 @@ -935,11 +941,9 @@ class BrowserThread(Thread): elements = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"]) if len(elements) == 0: - print("Loop element not found: ", - xpath) - print("找不到循环元素: ", xpath) - self.recordLog("pathNotFound: " + - node["parameters"]["xpath"]) + self.print_and_log("Loop element not found: ", + xpath) + self.print_and_log("找不到循环元素: ", xpath) for index in range(len(elements)): for i in node["sequence"]: # 挨个顺序执行循环里所有的操作 self.executeNode(i, elements[index], @@ -965,8 +969,9 @@ class BrowserThread(Thread): if self.browser.current_window_handle == thisHandle: break except Exception as e: - print("关闭标签页发生错误:", e) - print("Error occurred while closing tab: ", e) + self.print_and_log("关闭标签页发生错误:", e) + self.print_and_log( + "Error occurred while closing tab: ", e) if self.history["index"] != thisHistoryLength and self.history[ "handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断 difference = thisHistoryLength - \ @@ -978,8 +983,8 @@ class BrowserThread(Thread): # else: # time.sleep(2) # 切换历史记录等待: - self.Log("Change history back time or:", - node["parameters"]["historyWait"]) + self.recordLog("Change history back time or: " + + node["parameters"]["historyWait"]) try: self.browser.execute_script('window.stop()') except: @@ -991,9 +996,8 @@ class BrowserThread(Thread): if code <= 0: break except NoSuchElementException: - print("Loop element not found: ", xpath) - print("找不到循环元素: ", xpath) - self.recordLog("pathNotFound: " + node["parameters"]["xpath"]) + self.print_and_log("Loop element not found: ", xpath) + self.print_and_log("找不到循环元素: ", xpath) except Exception as e: raise elif int(node["parameters"]["loopType"]) == 2: # 固定元素列表 @@ -1026,8 +1030,9 @@ class BrowserThread(Thread): if self.browser.current_window_handle == thisHandle: break except Exception as e: - print("关闭标签页发生错误:", e) - print("Error occurred while closing tab: ", e) + self.print_and_log("关闭标签页发生错误:", e) + self.print_and_log( + "Error occurred while closing tab: ", e) if self.history["index"] != thisHistoryLength and self.history[ "handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断 difference = thisHistoryLength - \ @@ -1038,16 +1043,15 @@ class BrowserThread(Thread): time.sleep(node["parameters"]["historyWait"]) # else: # time.sleep(2) - self.Log("Change history back time or:", - node["parameters"]["historyWait"]) + self.recordLog("Change history back time or: " + + node["parameters"]["historyWait"]) try: self.browser.execute_script('window.stop()') except: pass except NoSuchElementException: - print("Loop element not found: ", path) - print("找不到循环元素: ", path) - self.recordLog("pathNotFound: " + path) + self.print_and_log("Loop element not found: ", path) + self.print_and_log("找不到循环元素: ", path) continue # 循环中找不到元素就略过操作 except Exception as e: raise @@ -1061,7 +1065,6 @@ class BrowserThread(Thread): textList = node["parameters"]["textList"].split("\n") for text in textList: text = replace_field_values(text, self.outputParameters) - self.recordLog("input: " + text) for i in node["sequence"]: # 挨个执行操作 self.executeNode(i, text, "", 0) if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行 @@ -1086,7 +1089,7 @@ class BrowserThread(Thread): # urlList.append(url) for url in urlList: url = replace_field_values(url, self.outputParameters) - self.recordLog("input: " + url) + self.recordLog("Input: " + url) for i in node["sequence"]: self.executeNode(i, url, "", 0) if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行 @@ -1167,19 +1170,16 @@ class BrowserThread(Thread): cookie_dict = {'name': name, 'value': value} # 加载 cookie self.browser.add_cookie(cookie_dict) - self.Log('Loading page: ' + url) - self.recordLog('Loading page: ' + url) + self.print_and_log('Loading page: ' + url) except TimeoutException: - self.Log('Time out after set seconds when loading page: ' + url) - self.recordLog( + self.print_and_log( 'Time out after set seconds when loading page: ' + url) try: self.browser.execute_script('window.stop()') except: pass except Exception as e: - print("Failed to load page: " + url) - self.recordLog('Failed to load page: ' + url) + self.print_and_log("Failed to load page: " + url) try: self.history["index"] = self.browser.execute_script( "return history.length") @@ -1195,7 +1195,7 @@ class BrowserThread(Thread): # 键盘输入事件 def inputInfo(self, para, loopValue): time.sleep(0.1) # 输入之前等待0.1秒 - self.Log("Wait 0.1 second before input") + self.recordLog("Wait 0.1 second before input") try: xpath = replace_field_values(para["xpath"], self.outputParameters) textbox = self.browser.find_element( @@ -1229,8 +1229,8 @@ class BrowserThread(Thread): try: replaced_text = replaced_text.split("~")[index - 1] except: - print("取值失败,可能是因为取值索引超出范围,将使用整个文本值") - print( + self.print_and_log("取值失败,可能是因为取值索引超出范围,将使用整个文本值") + self.print_and_log( "Failed to get value, maybe because the index is out of range, will use the entire text value") textbox.send_keys(replaced_text) if value.lower().find("") >= 0: @@ -1238,11 +1238,9 @@ class BrowserThread(Thread): self.execute_code( 2, para["afterJS"], para["afterJSWaitTime"], textbox, iframe=para["iframe"]) # 执行后置js except: - print("Cannot find input box element:" + - xpath + ", please try to set the wait time before executing this operation") - print("找不到输入框元素:" + xpath + ",请尝试在执行此操作前设置等待时间") - self.recordLog("Cannot find input box element:" + - para["xpath"] + "Please try to set the wait time before executing this operation") + self.print_and_log("Cannot find input box element:" + + xpath + ", please try to set the wait time before executing this operation") + self.print_and_log("找不到输入框元素:" + xpath + ",请尝试在执行此操作前设置等待时间") # 点击元素事件 def clickElement(self, para, loopElement=None, clickPath="", index=0): @@ -1279,11 +1277,9 @@ class BrowserThread(Thread): self.execute_code(2, para["beforeJS"], para["beforeJSWaitTime"], element, iframe=para["iframe"]) except: - print("Cannot find element:" + - path + ", please try to set the wait time before executing this operation") - print("找不到要点击的元素:" + path + ",请尝试在执行此操作前设置等待时间") - self.recordLog("Cannot find element:" + - path + ", please try to set the wait time before executing this operation") + self.print_and_log("Cannot find element:" + + path + ", please try to set the wait time before executing this operation") + self.print_and_log("找不到要点击的元素:" + path + ",请尝试在执行此操作前设置等待时间") tempHandleNum = len(self.browser.window_handles) # 记录之前的窗口位置 try: click_way = int(para["clickWay"]) @@ -1298,20 +1294,18 @@ class BrowserThread(Thread): '`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i