mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-19 03:39:42 +08:00
V0.3.2
This commit is contained in:
parent
527a004e73
commit
0a5d159b8a
@ -296,6 +296,7 @@ async function runBrowser(lang = "en", user_data_folder = '') {
|
||||
}
|
||||
options.addExtensions(path.join(__dirname, "XPathHelper.crx"));
|
||||
options.setChromeBinaryPath(chromeBinaryPath);
|
||||
options.add
|
||||
if (user_data_folder != "") {
|
||||
let dir = path.join(task_server.getDir(), user_data_folder);
|
||||
console.log(dir);
|
||||
|
1
ElectronJS/tasks/96.json
Normal file
1
ElectronJS/tasks/96.json
Normal file
@ -0,0 +1 @@
|
||||
{"id":96,"name":"[2305.07067] SigRec: Automatic Recovery of Function Signatures in Smart Contracts","url":"https://arxiv.org/abs/2305.07067","links":"https://arxiv.org/pdf/2008.03554.pdf","create_time":"6/2/2023, 1:00:27 AM","version":"0.3.2","containJudge":false,"desc":"https://arxiv.org/abs/2305.07067","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://arxiv.org/pdf/2008.03554.pdf","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://arxiv.org/pdf/2008.03554.pdf"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://arxiv.org/abs/2305.07067","links":"https://arxiv.org/pdf/2008.03554.pdf","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'PDF')]","//A[@class='abs-button download-pdf']"]}}]}
|
1
ElectronJS/tasks/97.json
Normal file
1
ElectronJS/tasks/97.json
Normal file
File diff suppressed because one or more lines are too long
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true,
|
||||
// "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
|
||||
"args": ["--id", "[5]", "--headless", "0", "--user_data", "0"]
|
||||
"args": ["--id", "[80]", "--headless", "0", "--user_data", "0"]
|
||||
}
|
||||
]
|
||||
}
|
@ -702,10 +702,18 @@ class BrowserThread(Thread):
|
||||
textbox.send_keys(Keys.SHIFT, Keys.END)
|
||||
# Send the DELETE key
|
||||
textbox.send_keys(Keys.DELETE)
|
||||
value = ""
|
||||
if para["useLoop"]:
|
||||
textbox.send_keys(loopValue)
|
||||
value = loopValue
|
||||
else:
|
||||
textbox.send_keys(para["value"])
|
||||
value = para["value"]
|
||||
pattern = r'Field\["([^"]+)"\]' # 将value中的Field[""]替换为outputParameters中的键值
|
||||
try:
|
||||
replaced_text = re.sub(pattern, lambda match: self.outputParameters.get(match.group(1), ''), value)
|
||||
except:
|
||||
replaced_text = value
|
||||
value = replaced_text
|
||||
textbox.send_keys(value)
|
||||
self.execute_code(2, para["afterJS"], para["afterJSWaitTime"], textbox) # 执行后置js
|
||||
# global bodyText # 每次执行点击,输入元素和打开网页操作后,需要更新bodyText
|
||||
self.bodyText = self.browser.find_element(By.CSS_SELECTOR, "body").text
|
||||
@ -1089,6 +1097,22 @@ if __name__ == '__main__':
|
||||
option.add_argument(
|
||||
"--disable-blink-features=AutomationControlled") # TMALL 反扒
|
||||
options.add_argument("--disable-blink-features=AutomationControlled") # TMALL 反扒
|
||||
options.add_experimental_option("prefs", {
|
||||
"download.default_directory": "Data/", # 设置文件下载路径
|
||||
"download.prompt_for_download": False, # 禁止下载提示框
|
||||
"plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
|
||||
"download.directory_upgrade": True,
|
||||
"download.extensions_to_open": "applications/pdf",
|
||||
"plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF
|
||||
})
|
||||
option.add_experimental_option("prefs", {
|
||||
"download.default_directory": "Data/", # 设置文件下载路径
|
||||
"download.prompt_for_download": False, # 禁止下载提示框
|
||||
"plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
|
||||
"download.directory_upgrade": True,
|
||||
"download.extensions_to_open": "applications/pdf",
|
||||
"plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF
|
||||
})
|
||||
print(options)
|
||||
threads = []
|
||||
for i in c.id:
|
||||
|
Loading…
x
Reference in New Issue
Block a user