Fix bug of document empty because of html.parsestring function

This commit is contained in:
Naibo_Mac_M2 2024-12-11 23:17:21 +08:00
parent 2031b09297
commit b4d7ddf5cb
2 changed files with 9 additions and 1 deletions

View File

@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--ids", "[5]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"args": ["--ids", "[35]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"--read_type", "remote",
]
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"

View File

@ -1148,6 +1148,14 @@ class BrowserThread(Thread):
self.history["handle"] = thisHandle
thisHistoryURL = self.browser.current_url
# 快速提取处理
# start = time.time()
try:
tree = html.fromstring(self.browser.page_source)
except Exception as e:
self.print_and_log("解析页面时出错,将切换普通提取模式|Error parsing page, will switch to normal extraction mode")
node["parameters"]["quickExtractable"] = False
# end = time.time()
# print("解析页面秒数:", end - start)
if node["parameters"]["quickExtractable"]:
self.browser.switch_to.default_content() # 切换到主页面
tree = html.fromstring(self.browser.page_source)