This commit is contained in:
naibo 2023-06-02 01:36:06 +08:00
parent 527a004e73
commit 0a5d159b8a
5 changed files with 30 additions and 3 deletions

View File

@ -296,6 +296,7 @@ async function runBrowser(lang = "en", user_data_folder = '') {
}
options.addExtensions(path.join(__dirname, "XPathHelper.crx"));
options.setChromeBinaryPath(chromeBinaryPath);
options.add
if (user_data_folder != "") {
let dir = path.join(task_server.getDir(), user_data_folder);
console.log(dir);

1
ElectronJS/tasks/96.json Normal file
View File

@ -0,0 +1 @@
{"id":96,"name":"[2305.07067] SigRec: Automatic Recovery of Function Signatures in Smart Contracts","url":"https://arxiv.org/abs/2305.07067","links":"https://arxiv.org/pdf/2008.03554.pdf","create_time":"6/2/2023, 1:00:27 AM","version":"0.3.2","containJudge":false,"desc":"https://arxiv.org/abs/2305.07067","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://arxiv.org/pdf/2008.03554.pdf","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://arxiv.org/pdf/2008.03554.pdf"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://arxiv.org/abs/2305.07067","links":"https://arxiv.org/pdf/2008.03554.pdf","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'PDF')]","//A[@class='abs-button download-pdf']"]}}]}

1
ElectronJS/tasks/97.json Normal file

File diff suppressed because one or more lines are too long

View File

@ -12,7 +12,7 @@
"console": "integratedTerminal",
"justMyCode": true,
// "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
"args": ["--id", "[5]", "--headless", "0", "--user_data", "0"]
"args": ["--id", "[80]", "--headless", "0", "--user_data", "0"]
}
]
}

View File

@ -702,10 +702,18 @@ class BrowserThread(Thread):
textbox.send_keys(Keys.SHIFT, Keys.END)
# Send the DELETE key
textbox.send_keys(Keys.DELETE)
value = ""
if para["useLoop"]:
textbox.send_keys(loopValue)
value = loopValue
else:
textbox.send_keys(para["value"])
value = para["value"]
pattern = r'Field\["([^"]+)"\]' # 将value中的Field[""]替换为outputParameters中的键值
try:
replaced_text = re.sub(pattern, lambda match: self.outputParameters.get(match.group(1), ''), value)
except:
replaced_text = value
value = replaced_text
textbox.send_keys(value)
self.execute_code(2, para["afterJS"], para["afterJSWaitTime"], textbox) # 执行后置js
# global bodyText # 每次执行点击输入元素和打开网页操作后需要更新bodyText
self.bodyText = self.browser.find_element(By.CSS_SELECTOR, "body").text
@ -1089,6 +1097,22 @@ if __name__ == '__main__':
option.add_argument(
"--disable-blink-features=AutomationControlled") # TMALL 反扒
options.add_argument("--disable-blink-features=AutomationControlled") # TMALL 反扒
options.add_experimental_option("prefs", {
"download.default_directory": "Data/", # 设置文件下载路径
"download.prompt_for_download": False, # 禁止下载提示框
"plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
"download.directory_upgrade": True,
"download.extensions_to_open": "applications/pdf",
"plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF
})
option.add_experimental_option("prefs", {
"download.default_directory": "Data/", # 设置文件下载路径
"download.prompt_for_download": False, # 禁止下载提示框
"plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
"download.directory_upgrade": True,
"download.extensions_to_open": "applications/pdf",
"plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF
})
print(options)
threads = []
for i in c.id: