This commit is contained in:
Naibo Wang 2024-04-22 07:13:54 +08:00
parent 1b6661afb8
commit 3ffd34d0fd
11 changed files with 131 additions and 70 deletions

View File

@ -5,9 +5,10 @@ import copy
import platform
import shutil
import string
import threading
# import undetected_chromedriver as uc
from utils import detect_optimizable, download_image, extract_text_from_html, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \
on_press_creator, on_release_creator, readCode, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
on_press_creator, on_release_creator, readCode, rename_downloaded_file, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
from myChrome import MyChrome
from threading import Thread, Event
from PIL import Image
@ -112,9 +113,13 @@ class BrowserThread(Thread):
self.print_and_log("Save Name for task ID", id, "is:", self.saveName)
if not os.path.exists("Data/Task_" + str(id)):
os.mkdir("Data/Task_" + str(id))
if not os.path.exists("Data/Task_" + str(id) + "/" + self.saveName):
os.mkdir("Data/Task_" + str(id) + "/" +
self.saveName) # 创建保存文件夹用来保存截图
self.downloadFolder = "Data/Task_" + str(id) + "/" + self.saveName
if not os.path.exists(self.downloadFolder):
os.mkdir(self.downloadFolder) # 创建保存文件夹用来保存截图和文件
if not os.path.exists(self.downloadFolder + "/files"):
os.mkdir(self.downloadFolder + "/files")
if not os.path.exists(self.downloadFolder + "/images"):
os.mkdir(self.downloadFolder + "/images")
self.getDataStep = 0
self.startSteps = 0
try:
@ -142,12 +147,21 @@ class BrowserThread(Thread):
self.print_and_log("Loading stealth.min.js")
self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': js}) # TMALL 反扒
self.browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
WebDriverWait(self.browser, 10)
self.browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id))
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(self.id), self.saveName, "files")
self.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': path}}
self.browser.execute("send_command", self.paramss) # 下载地址改变
self.browser.execute("send_command", self.paramss) # 下载目录改变
self.monitor_event = threading.Event()
self.monitor_thread = threading.Thread(target=rename_downloaded_file, args=(path, self.monitor_event)) #path后面的逗号不能省略是元组固定写法
self.monitor_thread.start()
# self.browser.get('about:blank')
self.procedure = service["graph"] # 程序执行流程
try:
@ -187,12 +201,19 @@ class BrowserThread(Thread):
self.links = list(filter(isnotnull, service["url"])) # 要执行的link
self.OUTPUT = [] # 采集的数据
try:
self.dataWriteMode = service["dataWriteMode"] # 数据写入模式1为追加2为覆盖
self.dataWriteMode = service["dataWriteMode"] # 数据写入模式1为追加2为覆盖3为重命名文件
except:
self.dataWriteMode = 1
if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx" or self.outputFormat == "json":
if self.dataWriteMode == 2 and os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat)
if os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
if self.dataWriteMode == 2:
os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat)
elif self.dataWriteMode == 3:
i = 2
while os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '_' + str(i) + '.' + self.outputFormat):
i = i + 1
self.saveName = self.saveName + '_' + str(i)
self.print_and_log("文件已存在,已重命名为", self.saveName)
self.writeMode = 1 # 写入模式0为新建1为追加
if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx":
if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
@ -521,7 +542,7 @@ class BrowserThread(Thread):
"/", len(self.links))
self.executeNode(0)
self.urlId = self.urlId + 1
files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName)
# files = os.listdir("Data/Task_" + str(self.id) + "/" + self.saveName)
# 如果目录为空,则删除该目录
# if not files:
# os.rmdir("Data/Task_" + str(self.id) + "/" + self.saveName)
@ -544,6 +565,7 @@ class BrowserThread(Thread):
shutil.rmtree(self.option["tmp_user_data_folder"])
except:
pass
self.monitor_event.set()
self.print_and_log("清理完成!|Clean up completed!")
self.print_and_log("您现在可以安全的关闭此窗口了。|You can safely close this window now.")
@ -768,6 +790,8 @@ class BrowserThread(Thread):
elif int(codeMode) == 5:
try:
code = readCode(code)
# global_namespace = globals().copy()
# global_namespace["self"] = self
output = exec(code)
self.recordLog("执行下面的代码:" + code)
self.recordLog("Execute the following code:" + code)
@ -847,6 +871,23 @@ class BrowserThread(Thread):
self.print_and_log("根据设置的自定义操作,任务已刷新页面|Task refreshed page according to custom operation")
elif codeMode == 9: # 发送邮件
send_email(node["parameters"]["emailConfig"])
elif codeMode == 10: # 清空所有字段值
self.clearOutputParameters()
elif codeMode == 11: # 生成新的数据行
line = new_line(self.outputParameters,
self.maxViewLength, self.outputParametersRecord)
self.OUTPUT.append(line)
elif codeMode == 12: # 退出程序
self.print_and_log("根据设置的自定义操作,任务已退出|Task exited according to custom operation")
self.saveData(exit=True)
self.browser.quit()
self.print_and_log("正在清理临时用户目录……|Cleaning up temporary user directory...")
try:
shutil.rmtree(self.option["tmp_user_data_folder"])
except:
pass
self.print_and_log("清理完成!|Clean up completed!")
os._exit(0)
else: # 0 1 5 6
output = self.execute_code(
codeMode, code, max_wait_time, iframe=params["iframe"])
@ -1106,7 +1147,25 @@ class BrowserThread(Thread):
self.recordLog(
"判断条件内所有条件分支的条件都不满足|None of the conditions in the judgment condition are met")
def handleHistory(self, node, xpath, thisHistoryURL, thisHistoryLength, index, element=None, elements=None):
def handleHistory(self, node, xpath, thisHandle, thisHistoryURL, thisHistoryLength, index, element=None, elements=None):
try:
changed_handle = self.browser.current_window_handle != thisHandle
except: # 如果网页被意外关闭了的情况下
self.browser.switch_to.window(
self.browser.window_handles[-1])
changed_handle = self.browser.window_handles[-1] != thisHandle
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
try:
while True: # 一直关闭窗口直到当前标签页
self.browser.close() # 关闭使用完的标签页
self.browser.switch_to.window(
self.browser.window_handles[-1])
if self.browser.current_window_handle == thisHandle:
break
except Exception as e:
self.print_and_log("关闭标签页发生错误:", e)
self.print_and_log(
"Error occurred while closing tab: ", e)
if self.history["index"] != thisHistoryLength and self.history["handle"] == self.browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断
difference = thisHistoryLength - self.history["index"] # 计算历史记录变化差值
self.browser.execute_script('history.go(' + str(difference) + ')') # 回退历史记录
@ -1132,12 +1191,13 @@ class BrowserThread(Thread):
if self.browser.current_url == thisHistoryURL or ti > thisHistoryLength: # 如果执行完一次循环之后网址发生了变化
break
time.sleep(2)
if element == None: # 不固定元素列表
element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
else: # 固定元素列表
element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
# if index > 0:
# index -= 1 # 如果是data:开头的网址,就要重试一次
if xpath != "":
if element == None: # 不固定元素列表
element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
else: # 固定元素列表
element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
# if index > 0:
# index -= 1 # 如果是data:开头的网址,就要重试一次
else:
if element == None:
element = elements
@ -1321,25 +1381,7 @@ class BrowserThread(Thread):
if self.BREAK:
self.BREAK = False
break
try:
changed_handle = self.browser.current_window_handle != thisHandle
except: # 如果网页被意外关闭了的情况下
self.browser.switch_to.window(
self.browser.window_handles[-1])
changed_handle = self.browser.window_handles[-1] != thisHandle
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
try:
while True: # 一直关闭窗口直到当前标签页
self.browser.close() # 关闭使用完的标签页
self.browser.switch_to.window(
self.browser.window_handles[-1])
if self.browser.current_window_handle == thisHandle:
break
except Exception as e:
self.print_and_log("关闭标签页发生错误:", e)
self.print_and_log(
"Error occurred while closing tab: ", e)
index, elements = self.handleHistory(node, xpath, thisHistoryURL, thisHistoryLength, index, elements=elements)
index, elements = self.handleHistory(node, xpath, thisHandle, thisHistoryURL, thisHistoryLength, index, elements=elements)
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
output = self.execute_code(int(
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"],
@ -1381,25 +1423,7 @@ class BrowserThread(Thread):
if self.BREAK:
self.BREAK = False
break
try:
changed_handle = self.browser.current_window_handle != thisHandle
except: # 如果网页被意外关闭了的情况下
self.browser.switch_to.window(
self.browser.window_handles[-1])
changed_handle = self.browser.window_handles[-1] != thisHandle
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
try:
while True: # 一直关闭窗口直到当前标签页
self.browser.close() # 关闭使用完的标签页
self.browser.switch_to.window(
self.browser.window_handles[-1])
if self.browser.current_window_handle == thisHandle:
break
except Exception as e:
self.print_and_log("关闭标签页发生错误:", e)
self.print_and_log(
"Error occurred while closing tab: ", e)
index, element = self.handleHistory(node, path, thisHistoryURL, thisHistoryLength, index, element=element)
index, element = self.handleHistory(node, path, thisHandle, thisHistoryURL, thisHistoryLength, index, element=element)
except NoSuchElementException:
self.print_and_log("Loop element not found: ", path)
self.print_and_log("找不到循环元素:", path)
@ -1447,6 +1471,7 @@ class BrowserThread(Thread):
code = get_output_code(output)
if code <= 0:
break
index, _ = self.handleHistory(node, "", thisHandle, thisHistoryURL, thisHistoryLength, index)
elif int(node["parameters"]["loopType"]) == 4: # 固定网址列表
# tempList = node["parameters"]["textList"].split("\r\n")
urlList = list(
@ -1715,6 +1740,21 @@ class BrowserThread(Thread):
script = 'var result = document.evaluate(`' + path + \
'`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
self.browser.execute_script(script, str(index)) # 用js的点击方法
elif click_way == 2: # 双击
try:
actions = ActionChains(self.browser) # 实例化一个action对象
actions.double_click(element).perform()
except Exception as e:
self.browser.execute_script("arguments[0].scrollIntoView();", element)
try:
actions = ActionChains(self.browser) # 实例化一个action对象
actions.double_click(element).perform()
except Exception as e:
self.print_and_log(f"Selenium双击元素{path}失败将尝试使用JavaScript双击")
self.print_and_log(f"Failed to double click element {path} with Selenium, will try to double click with JavaScript")
script = 'var result = document.evaluate(`' + path + \
'`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'
self.browser.execute_script(script, str(index)) # 用js的点击方法
self.recordLog("点击元素|Click element: " + path)
except TimeoutException:
self.print_and_log(
@ -1797,7 +1837,6 @@ class BrowserThread(Thread):
self.print_and_log("History Length Error")
self.history["index"] = 0
self.scrollDown(param) # 根据参数配置向下滚动
# rt.end()
def get_content(self, p, element):
content = ""
@ -1824,7 +1863,7 @@ class BrowserThread(Thread):
downloadPic = 0
if downloadPic == 1:
download_image(self, content, "Data/Task_" +
str(self.id) + "/" + self.saveName + "/", element)
str(self.id) + "/" + self.saveName + "/images", element)
else: # 普通节点
if p["splitLine"] == 1:
text = extract_text_from_html(element.get_attribute('outerHTML'))
@ -1853,7 +1892,7 @@ class BrowserThread(Thread):
downloadPic = 0
if downloadPic == 1:
download_image(self, content, "Data/Task_" +
str(self.id) + "/" + self.saveName + "/", element)
str(self.id) + "/" + self.saveName + "/images", element)
else:
command = 'var arr = [];\
var content = arguments[0];\
@ -1965,6 +2004,8 @@ class BrowserThread(Thread):
content = element.get_attribute(attribute_name)
except:
content = ""
elif p["contentType"] == 15: # 常量值
content = p["JS"]
if content == None:
content = ""
return content
@ -2208,7 +2249,7 @@ if __name__ == '__main__':
"server_address": "http://localhost:8074",
"keyboard": True, # 是否监听键盘输入
"pause_key": "p", # 暂停键
"version": "0.6.0",
"version": "0.6.2",
}
c = Config(config)
print(c)
@ -2283,7 +2324,9 @@ if __name__ == '__main__':
options.add_argument(
"--disable-blink-features=AutomationControlled") # TMALL 反扒
# 阻止http -> https的重定向
options.add_argument("--disable-features=CrossSiteDocumentBlockingIfIsolating,CrossSiteDocumentBlockingAlways,IsolateOrigins,site-per-process")
options.add_argument("--disable-web-security") # 禁用同源策略
options.add_argument('-ignore-certificate-errors')
options.add_argument('-ignore -ssl-errors')
@ -2370,8 +2413,8 @@ if __name__ == '__main__':
cloudflare = 0
if cloudflare == 0:
options.add_argument('log-level=3') # 隐藏日志
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id))
print("Data path:", path)
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id), "files")
print("文件下载路径|File Download path:", path)
options.add_experimental_option("prefs", {
# 设置文件下载路径
"download.default_directory": path,

View File

@ -23,7 +23,7 @@ For more complex operations, please download the source code and compile it for
"""
# 请在下面编写你的代码,不要有代码缩进!!! | Please write your code below, do not indent the code!!!
print(globals())
# 导包 | Import packages
from selenium.common.exceptions import ElementClickInterceptedException
@ -56,3 +56,20 @@ finally:
print("All parameters:", self.outputParameters)
print(test(3))
print("执行完毕|Execution completed")
import time
time.sleep(3)
def new_line(outputParameters, maxViewLength, record):
line = []
print("Use this function to print a new line in the console")
i = 0
for value in outputParameters.values():
line.append(value)
if record[i]:
print(value[:maxViewLength], " ", end="")
i += 1
print("")
return line
new_line(self.outputParameters, 10, [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True])

File diff suppressed because one or more lines are too long

View File

@ -1 +1 @@
{"id":228,"name":"[2312.02977] Exploring the nonclassical dynamics of the \"classical'' Schrödinger equation","url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","create_time":"12/7/2023, 2:44:58 AM","update_time":"12/7/2023, 2:56:47 AM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":1,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"desc":"https://arxiv.org/abs/2312.02977","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://arxiv.org/abs/2312.02977","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://arxiv.org/abs/2312.02977"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击Download PDF","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'Download P')]","//A[@class='abs-button download-pdf']","/html/body/div[last()-3]/main/div/div/div[last()-2]/div[last()-5]/ul/li[last()-2]/a"]}}]}
{"id":228,"name":"[2312.02977] Exploring the nonclassical dynamics of the \"classical'' Schrödinger equation","url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","create_time":"12/7/2023, 2:44:58 AM","update_time":"2024-01-05 22:08:46","version":"0.6.0","saveThreshold":10,"quitWaitTime":3,"environment":1,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"TTT","dataWriteMode":3,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://arxiv.org/abs/2312.02977","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://arxiv.org/abs/2312.02977","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://arxiv.org/abs/2312.02977"},{"id":1,"name":"loopTimes_1","nodeId":5,"nodeName":"循环 - 单个元素","desc":"循环循环 - 单个元素执行的次数0代表无限循环","type":"int","exampleValue":10,"value":10}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://arxiv.org/abs/2312.02977","links":"https://arxiv.org/abs/2312.02977","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":2,"parentId":2,"type":0,"option":2,"title":"点击Download PDF","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'Download P')]","//A[@class='abs-button download-pdf']","/html/body/div[last()-3]/main/div/div/div[last()-2]/div[last()-5]/ul/li[last()-2]/a"]}},{"id":-1,"index":3,"parentId":0,"type":0,"option":2,"title":"点击Download PDF","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'Download P')]","//A[@class='abs-button download-pdf']","/html/body/div[last()-3]/main/div/div/div[last()-2]/div[last()-5]/ul/li[last()-2]/a"]}},{"id":-1,"index":4,"parentId":0,"type":0,"option":2,"title":"点击Download PDF","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"download-pdf\")]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/main[1]/div[1]/div[1]/div[2]/div[1]/ul[1]/li[1]/a[1]","//a[contains(., 'Download P')]","//A[@class='abs-button download-pdf']","/html/body/div[last()-3]/main/div/div/div[last()-2]/div[last()-5]/ul/li[last()-2]/a"]}},{"id":2,"index":5,"parentId":0,"type":1,"option":8,"title":"循环 - 单个元素","sequence":[2],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"//body","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":0,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":10,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0}}]}

View File

@ -1 +1 @@
{"id":229,"name":"知乎 - 有问题,就会有答案","url":"https://www.zhihu.com","links":"https://www.zhihu.com","create_time":"07/12/2023, 03:26:24","update_time":"07/12/2023, 03:43:34","version":"0.6.0","saveThreshold":10,"quitWaitTime":6,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"desc":"https://www.zhihu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","//div[contains(., '死刑执行前可以谎称肚')]","/html/body/div[last()-7]/div/main/div/div/div[last()-1]/div/div/div/div/div/div[last()-12]/div/div/div/div/h2/div"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"params":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"unique_index":"onlvi030w9jlpu5tjzb","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
{"id":229,"name":"知乎 - 有问题,就会有答案","url":"https://www.zhihu.com","links":"https://www.zhihu.com","create_time":"07/12/2023, 03:26:24","update_time":"2023-12-27 20:05:50","version":"0.6.0","saveThreshold":10,"quitWaitTime":6,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"知了个乎","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.zhihu.com"},{"id":1,"name":"loopTimes_1","nodeId":4,"nodeName":"循环 - 单个元素","desc":"循环循环 - 单个元素执行的次数0代表无限循环","type":"int","exampleValue":0,"value":0}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,4,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":2,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","//div[contains(., '死刑执行前可以谎称肚')]","/html/body/div[last()-7]/div/main/div/div/div[last()-1]/div/div/div/div/div/div[last()-12]/div/div/div/div/h2/div"]}},{"id":4,"index":3,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"params":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"unique_index":"onlvi030w9jlpu5tjzb","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":2,"index":4,"parentId":0,"type":1,"option":8,"title":"循环 - 单个元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":0,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0}}]}

View File

@ -1 +1 @@
{"id":70,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 8:21:45 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}}]}
{"id":-2,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 8:21:45 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}}]}

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
{"id":321,"name":"百度一下,你就知道","url":"https://www.baidu.com","links":"https://www.baidu.com","create_time":"2024-04-22 07:02:02","update_time":"2024-04-22 07:02:16","version":"0.6.2","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.baidu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.baidu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.baidu.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.baidu.com","links":"https://www.baidu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环点击每个元素","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[1]/div[5]/div[1]/div[1]/div[3]/ul[1]/li/a[1]/span[2]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":1,"maxWaitTime":10,"params":[],"alertHandleType":0,"downloadWaitTime":3600,"allXPaths":""}}]}

View File

@ -48,7 +48,7 @@ def copy_folder(source_folder, destination_folder):
def get_chrome_version():
version = "120"
version = "124"
if sys.platform == "win32":
version_re = re.compile(r"^[1-9]\d*\.\d*.\d*")
try: