mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-12 11:37:11 +08:00
Fix bug of document empty because of html.parsestring function
This commit is contained in:
parent
2031b09297
commit
b4d7ddf5cb
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
||||
"justMyCode": false,
|
||||
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
"args": ["--ids", "[5]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
|
||||
"args": ["--ids", "[35]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
|
||||
"--read_type", "remote",
|
||||
]
|
||||
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
|
||||
|
@ -1148,6 +1148,14 @@ class BrowserThread(Thread):
|
||||
self.history["handle"] = thisHandle
|
||||
thisHistoryURL = self.browser.current_url
|
||||
# 快速提取处理
|
||||
# start = time.time()
|
||||
try:
|
||||
tree = html.fromstring(self.browser.page_source)
|
||||
except Exception as e:
|
||||
self.print_and_log("解析页面时出错,将切换普通提取模式|Error parsing page, will switch to normal extraction mode")
|
||||
node["parameters"]["quickExtractable"] = False
|
||||
# end = time.time()
|
||||
# print("解析页面秒数:", end - start)
|
||||
if node["parameters"]["quickExtractable"]:
|
||||
self.browser.switch_to.default_content() # 切换到主页面
|
||||
tree = html.fromstring(self.browser.page_source)
|
||||
|
Loading…
x
Reference in New Issue
Block a user