From 56f0847500f6665cd718feb71b996b49b7e1bc45 Mon Sep 17 00:00:00 2001 From: naibo Date: Tue, 7 Jan 2025 23:12:26 +0800 Subject: [PATCH 1/3] Parameter name change for loopExecute --- ExecuteStage/easyspider_executestage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 47f94a8..f02992b 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -1136,7 +1136,7 @@ class BrowserThread(Thread): return index, element # 对循环的处理 - def loopExecute(self, node, loopValue, clickPath="", index=0): + def loopExecute(self, node, loopValue, loopPath="", index=0): time.sleep(0.1) # 第一次执行循环的时候强制等待1秒 thisHandle = self.browser.current_window_handle # 记录本次循环内的标签页的ID try: From f43bdd236d999c28fedf7bf8f319729d393b07cc Mon Sep 17 00:00:00 2001 From: naibo Date: Wed, 8 Jan 2025 11:44:02 +0800 Subject: [PATCH 2/3] Screenshot folder --- ExecuteStage/.vscode/launch.json | 2 +- ExecuteStage/easyspider_executestage.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index b61737a..afcdd62 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,7 @@ "justMyCode": false, // "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--ids", "[89]", "--headless", "0", "--user_data", "0", "--keyboard", "0", + "args": ["--ids", "[8]", "--headless", "0", "--user_data", "0", "--keyboard", "0", "--read_type", "remote", ] // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name" diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index f02992b..fd20686 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -108,6 +108,8 @@ class BrowserThread(Thread): os.mkdir(self.downloadFolder + "/files") if not os.path.exists(self.downloadFolder + "/images"): os.mkdir(self.downloadFolder + "/images") + if not os.path.exists(self.downloadFolder + "/screenshots"): + os.mkdir(self.downloadFolder + "/screenshots") self.getDataStep = 0 self.startSteps = 0 try: @@ -1870,7 +1872,7 @@ class BrowserThread(Thread): # 调整浏览器窗口的大小 self.browser.set_window_size(width, height) element.screenshot("Data/Task_" + str(self.id) + "/" + self.saveName + - "/" + str(time.time()) + ".png") + "/screenshots/" + str(time.time()) + ".png") # 截图完成后,将浏览器的窗口大小设置为原来的大小 self.browser.set_window_size(width, height) elif p["contentType"] == 8: From 119cb997112b456e492b62b993f4cac9206a60e1 Mon Sep 17 00:00:00 2001 From: naibo Date: Wed, 8 Jan 2025 12:02:36 +0800 Subject: [PATCH 3/3] Screenshots zoom to the maximum size under headless mode --- ExecuteStage/.vscode/launch.json | 2 +- ExecuteStage/easyspider_executestage.py | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index afcdd62..d57e97b 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,7 @@ "justMyCode": false, // "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--ids", "[8]", "--headless", "0", "--user_data", "0", "--keyboard", "0", + "args": ["--ids", "[0]", "--headless", "0", "--user_data", "0", "--keyboard", "0", "--read_type", "remote", ] // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name" diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index fd20686..a5b596b 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -73,13 +73,13 @@ desired_capabilities["pageLoadStrategy"] = "none" class BrowserThread(Thread): - def __init__(self, browser_t, id, service, version, event, saveName, config, option): + def __init__(self, browser_t, id, service, version, event, saveName, config, option, commandline_config=""): Thread.__init__(self) self.logs = io.StringIO() self.log = bool(service.get("recordLog", True)) self.browser = browser_t self.option = option - self.config = config + self.commandline_config = commandline_config self.version = version self.totalSteps = 0 self.id = id @@ -1870,7 +1870,15 @@ class BrowserThread(Thread): width = size["width"] height = size["height"] # 调整浏览器窗口的大小 - self.browser.set_window_size(width, height) + if self.commandline_config["headless"] == 1: # 无头模式下,截取整个网页的高度 + page_width = self.browser.execute_script( + "return document.body.scrollWidth") + page_height = self.browser.execute_script( + "return document.body.scrollHeight") + self.browser.set_window_size(page_width, page_height) + time.sleep(1) + else: + self.browser.set_window_size(width, height) element.screenshot("Data/Task_" + str(self.id) + "/" + self.saveName + "/screenshots/" + str(time.time()) + ".png") # 截图完成后,将浏览器的窗口大小设置为原来的大小 @@ -2183,7 +2191,7 @@ class BrowserThread(Thread): if __name__ == '__main__': # 如果需要调试程序,请在命令行参数中加入--keyboard 0 来禁用键盘监听以提升调试速度 # If you need to debug the program, please add --keyboard 0 in the command line parameters to disable keyboard listening to improve debugging speed - config = { + commandline_config = { "ids": [0], "saved_file_name": "", "user_data": False, @@ -2198,7 +2206,7 @@ if __name__ == '__main__': "docker_driver": "", "user_folder": "", } - c = Config(config) + c = Config(commandline_config) print(c) options = webdriver.ChromeOptions() driver_path = "chromedriver.exe" @@ -2440,7 +2448,7 @@ if __name__ == '__main__': event = Event() event.set() thread = BrowserThread(browser_t, id, service, - c.version, event, c.saved_file_name, config=config, option=tmp_options[i]) + c.version, event, c.saved_file_name, config=config, option=tmp_options[i], commandline_config=c) print("Thread with task id: ", id, " is created") threads.append(thread) thread.start()