mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-20 22:15:08 +08:00
Update cloudflare
This commit is contained in:
parent
145c766221
commit
f567e4edc3
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
|||||||
"justMyCode": false,
|
"justMyCode": false,
|
||||||
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
|
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||||
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||||
"args": ["--id", "[85]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
|
"args": ["--id", "[25]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
@ -41,7 +41,7 @@ from urllib.parse import urljoin
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志
|
onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志
|
||||||
# import undetected_chromedriver as uc
|
import undetected_chromedriver as uc
|
||||||
# import pandas as pd
|
# import pandas as pd
|
||||||
# import numpy
|
# import numpy
|
||||||
# import pytesseract
|
# import pytesseract
|
||||||
@ -116,7 +116,7 @@ class BrowserThread(Thread):
|
|||||||
self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
|
self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
|
||||||
'source': js}) # TMALL 反扒
|
'source': js}) # TMALL 反扒
|
||||||
WebDriverWait(self.browser, 10)
|
WebDriverWait(self.browser, 10)
|
||||||
self.browser.get('about:blank')
|
# self.browser.get('about:blank')
|
||||||
self.procedure = service["graph"] # 程序执行流程
|
self.procedure = service["graph"] # 程序执行流程
|
||||||
try:
|
try:
|
||||||
self.maxViewLength = service["maxViewLength"] # 最大显示长度
|
self.maxViewLength = service["maxViewLength"] # 最大显示长度
|
||||||
@ -729,7 +729,8 @@ class BrowserThread(Thread):
|
|||||||
for i in node["sequence"]: # 从根节点开始向下读取
|
for i in node["sequence"]: # 从根节点开始向下读取
|
||||||
self.executeNode(i, loopValue, loopPath, index)
|
self.executeNode(i, loopValue, loopPath, index)
|
||||||
elif node["option"] == 1: # 打开网页操作
|
elif node["option"] == 1: # 打开网页操作
|
||||||
self.openPage(node["parameters"], loopValue)
|
if not (nodeId == 1 and self.service["cloudflare"] == 1):
|
||||||
|
self.openPage(node["parameters"], loopValue)
|
||||||
elif node["option"] == 2: # 点击元素
|
elif node["option"] == 2: # 点击元素
|
||||||
self.clickElement(node["parameters"], loopValue, loopPath, index)
|
self.clickElement(node["parameters"], loopValue, loopPath, index)
|
||||||
elif node["option"] == 3: # 提取数据
|
elif node["option"] == 3: # 提取数据
|
||||||
@ -1938,9 +1939,14 @@ if __name__ == '__main__':
|
|||||||
elif cloudflare == 1:
|
elif cloudflare == 1:
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
|
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
|
||||||
|
# options.add_argument("--auto-open-devtools-for-tabs")
|
||||||
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
|
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
|
||||||
browser_t = MyUCChrome(
|
browser_t = MyUCChrome(options=options, driver_executable_path=driver_path)
|
||||||
options=options, driver_executable_path=driver_path)
|
links = list(filter(isnotnull, service["links"].split("\n")))
|
||||||
|
browser_t.execute_script('window.open("'+ links[0] +'","_blank");') # open page in new tab
|
||||||
|
time.sleep(5) # wait until page has loaded
|
||||||
|
browser_t.switch_to.window(browser_t.window_handles[1]) # switch to new tab
|
||||||
|
# browser_t = uc.Chrome()
|
||||||
else:
|
else:
|
||||||
print("Cloudflare模式只支持Windows x64平台。")
|
print("Cloudflare模式只支持Windows x64平台。")
|
||||||
print(
|
print(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user