mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-21 02:25:10 +08:00
相对URL转绝对
This commit is contained in:
parent
bcdf6fb413
commit
0b29cfc371
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
||||
"justMyCode": true,
|
||||
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
"args": ["--id", "[3]", "--headless", "0", "--user_data", "1"]
|
||||
"args": ["--id", "[6]", "--headless", "0", "--user_data", "1"]
|
||||
}
|
||||
]
|
||||
}
|
@ -14,6 +14,7 @@ import sys
|
||||
import time
|
||||
# import keyboard
|
||||
import requests
|
||||
from urllib.parse import urljoin
|
||||
from lxml import etree
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
@ -1203,6 +1204,9 @@ class BrowserThread(Thread):
|
||||
# 拼接所有文本内容并去掉两边的空白
|
||||
content = ' '.join(result.strip()
|
||||
for result in content if result.strip())
|
||||
if p["nodeType"] == 2:
|
||||
base_url = self.browser.current_url
|
||||
content = urljoin(base_url, content) # 合并链接相对路径为绝对路径
|
||||
else:
|
||||
content = p["default"]
|
||||
if not self.dataNotFoundKeys[p["name"]]:
|
||||
|
Loading…
x
Reference in New Issue
Block a user