From 119cb997112b456e492b62b993f4cac9206a60e1 Mon Sep 17 00:00:00 2001 From: naibo Date: Wed, 8 Jan 2025 12:02:36 +0800 Subject: [PATCH] Screenshots zoom to the maximum size under headless mode --- ExecuteStage/.vscode/launch.json | 2 +- ExecuteStage/easyspider_executestage.py | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index afcdd62..d57e97b 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,7 @@ "justMyCode": false, // "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--ids", "[8]", "--headless", "0", "--user_data", "0", "--keyboard", "0", + "args": ["--ids", "[0]", "--headless", "0", "--user_data", "0", "--keyboard", "0", "--read_type", "remote", ] // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name" diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index fd20686..a5b596b 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -73,13 +73,13 @@ desired_capabilities["pageLoadStrategy"] = "none" class BrowserThread(Thread): - def __init__(self, browser_t, id, service, version, event, saveName, config, option): + def __init__(self, browser_t, id, service, version, event, saveName, config, option, commandline_config=""): Thread.__init__(self) self.logs = io.StringIO() self.log = bool(service.get("recordLog", True)) self.browser = browser_t self.option = option - self.config = config + self.commandline_config = commandline_config self.version = version self.totalSteps = 0 self.id = id @@ -1870,7 +1870,15 @@ class BrowserThread(Thread): width = size["width"] height = size["height"] # 调整浏览器窗口的大小 - self.browser.set_window_size(width, height) + if self.commandline_config["headless"] == 1: # 无头模式下,截取整个网页的高度 + page_width = self.browser.execute_script( + "return document.body.scrollWidth") + page_height = self.browser.execute_script( + "return document.body.scrollHeight") + self.browser.set_window_size(page_width, page_height) + time.sleep(1) + else: + self.browser.set_window_size(width, height) element.screenshot("Data/Task_" + str(self.id) + "/" + self.saveName + "/screenshots/" + str(time.time()) + ".png") # 截图完成后,将浏览器的窗口大小设置为原来的大小 @@ -2183,7 +2191,7 @@ class BrowserThread(Thread): if __name__ == '__main__': # 如果需要调试程序,请在命令行参数中加入--keyboard 0 来禁用键盘监听以提升调试速度 # If you need to debug the program, please add --keyboard 0 in the command line parameters to disable keyboard listening to improve debugging speed - config = { + commandline_config = { "ids": [0], "saved_file_name": "", "user_data": False, @@ -2198,7 +2206,7 @@ if __name__ == '__main__': "docker_driver": "", "user_folder": "", } - c = Config(config) + c = Config(commandline_config) print(c) options = webdriver.ChromeOptions() driver_path = "chromedriver.exe" @@ -2440,7 +2448,7 @@ if __name__ == '__main__': event = Event() event.set() thread = BrowserThread(browser_t, id, service, - c.version, event, c.saved_file_name, config=config, option=tmp_options[i]) + c.version, event, c.saved_file_name, config=config, option=tmp_options[i], commandline_config=c) print("Thread with task id: ", id, " is created") threads.append(thread) thread.start()