mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-20 02:24:56 +08:00
页面无变化计算所有iframe内容
This commit is contained in:
parent
528500f795
commit
da6f078a5c
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/184.json
Normal file
1
ElectronJS/tasks/184.json
Normal file
File diff suppressed because one or more lines are too long
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
||||
"justMyCode": false,
|
||||
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
"args": ["--id", "[54]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
|
||||
"args": ["--id", "[63]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
|
||||
}
|
||||
]
|
||||
}
|
@ -427,9 +427,18 @@ class BrowserThread(Thread):
|
||||
bodyText = ""
|
||||
i = 0
|
||||
while True:
|
||||
# newBodyText = self.browser.page_source
|
||||
newBodyText = self.browser.find_element(
|
||||
By.CSS_SELECTOR, "body", iframe=para["iframe"]).text
|
||||
By.CSS_SELECTOR, "body", iframe=False).text
|
||||
if para["iframe"]: # 如果标记了iframe
|
||||
iframes = self.browser.find_elements(
|
||||
By.CSS_SELECTOR, "iframe", iframe=False)
|
||||
for iframe in iframes:
|
||||
self.browser.switch_to.default_content()
|
||||
self.browser.switch_to.frame(iframe)
|
||||
iframe_text = super(self.browser.__class__, self.browser).find_element(
|
||||
By.CSS_SELECTOR, "body").text # 用super调用父类的方法
|
||||
newBodyText += iframe_text
|
||||
self.browser.switch_to.default_content()
|
||||
if newBodyText == bodyText:
|
||||
print("页面已检测不到新内容,停止滚动。")
|
||||
print(
|
||||
@ -842,9 +851,20 @@ class BrowserThread(Thread):
|
||||
finished = False
|
||||
# newBodyText = self.browser.page_source
|
||||
# newBodyText = self.browser.find_element(By.XPATH, "//body").text
|
||||
newBodyText = self.browser.find_element(
|
||||
By.CSS_SELECTOR, "body", iframe=node["parameters"]["iframe"]).text
|
||||
if node["parameters"]["exitCount"] == 0:
|
||||
newBodyText = self.browser.find_element(
|
||||
By.CSS_SELECTOR, "body", iframe=False).text
|
||||
if node["parameters"]["iframe"]: # 如果标记了iframe
|
||||
iframes = self.browser.find_elements(
|
||||
By.CSS_SELECTOR, "iframe", iframe=False)
|
||||
for iframe in iframes:
|
||||
self.browser.switch_to.default_content()
|
||||
self.browser.switch_to.frame(iframe)
|
||||
iframe_text = super(self.browser.__class__, self.browser).find_element(
|
||||
By.CSS_SELECTOR, "body").text # 用super调用父类的方法
|
||||
newBodyText += iframe_text
|
||||
self.browser.switch_to.default_content()
|
||||
|
||||
if newBodyText == bodyText: # 如果页面内容无变化
|
||||
print("页面已检测不到新内容,停止循环。")
|
||||
print("No new content detected on the page, stop loop.")
|
||||
|
Loading…
x
Reference in New Issue
Block a user