mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-16 16:26:56 +08:00
Logic of history
This commit is contained in:
parent
50949d4f8c
commit
f21804f0e9
@ -346,6 +346,10 @@ class BrowserThread(Thread):
|
||||
node["parameters"]["quickExtractable"] = True # 先假设可以快速提取
|
||||
for param in params:
|
||||
optimizable = detect_optimizable(param, ignoreWaitElement=False, waitElement=waitElement)
|
||||
try:
|
||||
iframe = param["iframe"]
|
||||
except:
|
||||
param["iframe"] = False
|
||||
if param["iframe"] and not param["relative"]: # 如果是iframe,那么不可以快速提取
|
||||
optimizable = False
|
||||
if not optimizable: # 如果有一个不满足优化条件,那么就不能快速提取
|
||||
@ -354,8 +358,14 @@ class BrowserThread(Thread):
|
||||
if node["parameters"]["quickExtractable"]:
|
||||
self.print_and_log("循环操作<" + node["title"] + ">可以快速提取数据")
|
||||
self.print_and_log("Loop operation <" + node["title"] + "> can extract data quickly")
|
||||
node["parameters"]["clear"] = self.procedure[node["sequence"][0]]["parameters"]["clear"]
|
||||
node["parameters"]["newLine"] = self.procedure[node["sequence"][0]]["parameters"]["newLine"]
|
||||
try:
|
||||
node["parameters"]["clear"] = self.procedure[node["sequence"][0]]["parameters"]["clear"]
|
||||
except:
|
||||
node["parameters"]["clear"] = 0
|
||||
try:
|
||||
node["parameters"]["newLine"] = self.procedure[node["sequence"][0]]["parameters"]["newLine"]
|
||||
except:
|
||||
node["parameters"]["newLine"] = 1
|
||||
if int(node["parameters"]["loopType"]) == 1: # 不固定元素列表
|
||||
node["parameters"]["baseXPath"] = node["parameters"]["xpath"]
|
||||
elif int(node["parameters"]["loopType"]) == 2: # 固定元素列表
|
||||
@ -838,7 +848,6 @@ class BrowserThread(Thread):
|
||||
self.print_and_log("Cannot find element:", xpath)
|
||||
|
||||
# 执行节点关键函数部分
|
||||
|
||||
def executeNode(self, nodeId, loopValue="", loopPath="", index=0):
|
||||
node = self.procedure[nodeId]
|
||||
# WebDriverWait(self.browser, 10).until
|
||||
@ -1100,7 +1109,12 @@ class BrowserThread(Thread):
|
||||
try:
|
||||
finished = False
|
||||
if node["parameters"]["exitCount"] == 0:
|
||||
newBodyText = self.browser.find_element(By.XPATH, node["parameters"]["exitElement"], iframe=node["parameters"]["iframe"]).text
|
||||
# newBodyText = self.browser.find_element(By.XPATH, node["parameters"]["exitElement"], iframe=node["parameters"]["iframe"]).text
|
||||
# 用find_elements获取所有匹配到的文本
|
||||
exitElements = self.browser.find_elements(By.XPATH, node["parameters"]["exitElement"], iframe=node["parameters"]["iframe"])
|
||||
newBodyText = ""
|
||||
for exitElement in exitElements:
|
||||
newBodyText += exitElement.text
|
||||
if node["parameters"]["iframe"]: # 如果标记了iframe
|
||||
iframes = self.browser.find_elements(
|
||||
By.CSS_SELECTOR, "iframe", iframe=False)
|
||||
@ -1111,7 +1125,6 @@ class BrowserThread(Thread):
|
||||
By.CSS_SELECTOR, "body").text # 用super调用父类的方法
|
||||
newBodyText += iframe_text
|
||||
self.browser.switch_to.default_content()
|
||||
|
||||
if newBodyText == bodyText: # 如果页面内容无变化
|
||||
self.print_and_log("页面已检测不到新内容,停止循环。")
|
||||
self.print_and_log(
|
||||
|
@ -37,7 +37,7 @@ def test(n = 0):
|
||||
# 异常捕获 | Exception capture
|
||||
try:
|
||||
# 使用XPath定位元素并点击浏览器中元素 | Use XPath to locate the element and click the element in the browser
|
||||
element = self.browser.find_element(By.XPATH, "//*[contains(@class, 'LeftSide_menu_list__qXCeM')]/div[1]/a[1]") # 这里请忽略IDE的报错,因为代码是嵌入到程序中的,IDE无法识别self变量和By变量是正常的 | Please ignore the error reported by the IDE, because the code is embedded in the program, and the IDE cannot recognize that the self variable and By variable are normal
|
||||
element = self.browser.find_element(By.XPATH, "//*[contains(@class, 'LeftSide_menu_list__qXCeM')]/div[1]/a[1]") # 这里请忽略IDE语法报错如找不到self的报错,因为代码是嵌入到程序中的,IDE无法识别self变量和By变量是正常的 | Please ignore the warning reported by the IDE such as "'self' is not defined", because the code is embedded in the program, and the IDE cannot recognize that the self variable and By variable are normal
|
||||
actions = ActionChains(self.browser)
|
||||
actions.click(element).perform()
|
||||
print("点击成功|Click success")
|
||||
|
File diff suppressed because one or more lines are too long
1
.temp_to_pub/EasySpider_windows_x64/tasks/278.json
Normal file
1
.temp_to_pub/EasySpider_windows_x64/tasks/278.json
Normal file
File diff suppressed because one or more lines are too long
1
.temp_to_pub/EasySpider_windows_x64/tasks/292.json
Normal file
1
.temp_to_pub/EasySpider_windows_x64/tasks/292.json
Normal file
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
@ -10,7 +10,7 @@
|
||||
<script src="vue.js"></script>
|
||||
<script src="bootstrap/js/bootstrap.js"></script>
|
||||
<link href="bootstrap/css/bootstrap.css" rel="stylesheet"></link>
|
||||
<title>任务调用/执行 | Task Invoke</title>
|
||||
<title>任务执行 | Task Execute</title>
|
||||
<style>
|
||||
table {
|
||||
table-layout: auto;
|
||||
@ -58,7 +58,7 @@
|
||||
<div class="modal-dialog modal-lg">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h4 class="modal-title" id="myModalLabel">{{"Task Invocation Instruction~执行任务说明" | lang}}</h4>
|
||||
<h4 class="modal-title" id="myModalLabel">{{"Task Execution Instruction~执行任务说明" | lang}}</h4>
|
||||
<button type="button" class="close" data-dismiss="modal" aria-hidden="true">×</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
@ -189,12 +189,12 @@
|
||||
<ol class="breadcrumb" style="padding-left:0;background-color: white">
|
||||
<li @click="gotoHome" class="breadcrumb-item"><a href="#">{{"Home~首页" | lang}}</a></li>
|
||||
<li @click="gotoInfo" aria-current="page" class="breadcrumb-item" style="color: black"><a href="#">{{"Task Information~任务信息" | lang}}</a></li>
|
||||
<li aria-current="page" class="breadcrumb-item active" style="color: black">{{"Task Invocation~任务调用/执行"
|
||||
<li aria-current="page" class="breadcrumb-item active" style="color: black">{{"Task Execution~任务执行"
|
||||
| lang}}
|
||||
</li>
|
||||
</ol>
|
||||
</nav>
|
||||
<h4 style="text-align: center;">{{"Task Invocation~任务调用/执行" | lang}}</h4>
|
||||
<h4 style="text-align: center;">{{"Task Execution~任务执行" | lang}}</h4>
|
||||
<p>{{"Task Name:~任务名称:" | lang}} {{task["name"]}}</p>
|
||||
<p style="word-wrap: break-word;word-break: break-all;overflow: hidden;max-height: 100px;">{{"Task Description:~任务描述:" | lang}} {{task["desc"]}}</p>
|
||||
<p style="word-wrap: break-word;word-break: break-all;overflow: hidden;max-height: 100px;">{{"API URL (POST):~API 调用网址(POST):" |
|
||||
|
@ -51,7 +51,7 @@
|
||||
<p style="word-wrap: break-word;word-break: break-all;overflow: hidden;max-height: 100px;">{{"Update Time:~更新时间:" | lang}} {{dateFormat(task["update_time"])}}</p>
|
||||
<p>{{"Operations (Please close this window and select 'Design Task' button if you want to modify task with a browser)~操作(如要带浏览器修改任务流程请关闭此窗口并选择设计任务)" | lang}}</p>
|
||||
<p><a style="margin-top: 5px" href="javascript:void(0)" v-on:click="modifyTask(task['id'],task['url'])" class="btn btn-primary">{{"Modify Task Workflow~修改任务流程" | lang}}</a>
|
||||
<a style="margin-top: 5px" href="javascript:void(0)" v-on:click="invokeTask(task['id'],task['url'])" class="btn btn-primary">{{"Invoke/Execute Task~调用/执行任务" | lang}}</a></p>
|
||||
<a style="margin-top: 5px" href="javascript:void(0)" v-on:click="invokeTask(task['id'],task['url'])" class="btn btn-primary">{{"Invoke & execute Task~执行任务" | lang}}</a></p>
|
||||
<p>{{"Input Parameters~输入参数" | lang}}</p>
|
||||
<table class="table table-bordered">
|
||||
<tbody>
|
||||
|
1
ElectronJS/tasks/293.json
Normal file
1
ElectronJS/tasks/293.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/294.json
Normal file
1
ElectronJS/tasks/294.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/295.json
Normal file
1
ElectronJS/tasks/295.json
Normal file
File diff suppressed because one or more lines are too long
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
||||
"justMyCode": false,
|
||||
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
"args": ["--ids", "[53]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
|
||||
"args": ["--ids", "[58]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
|
||||
"--read_type", "remote"]
|
||||
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
|
||||
}
|
||||
|
@ -1031,7 +1031,8 @@ class BrowserThread(Thread):
|
||||
ti = 0
|
||||
# print("CURRENT URL:", self.browser.current_url)
|
||||
# time.sleep(2)
|
||||
if self.browser.current_url.startswith("data:") or self.browser.current_url.startswith("chrome:"):
|
||||
# if self.browser.current_url.startswith("data:") or self.browser.current_url.startswith("chrome:"):
|
||||
if self.browser.current_url != thisHistoryURL and self.history["index"] != thisHistoryLength and self.history["handle"] == self.browser.current_window_handle:
|
||||
while self.browser.current_url != thisHistoryURL: # 如果执行完一次循环之后网址发生了变化
|
||||
try:
|
||||
self.browser.execute_script("history.go(1)") # 如果是data:开头的网址,就前进一步
|
||||
@ -1045,8 +1046,8 @@ class BrowserThread(Thread):
|
||||
element = self.browser.find_elements(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
|
||||
else: # 固定元素列表
|
||||
element = self.browser.find_element(By.XPATH, xpath, iframe=node["parameters"]["iframe"])
|
||||
if index > 0:
|
||||
index -= 1 # 如果是data:开头的网址,就要重试一次
|
||||
# if index > 0:
|
||||
# index -= 1 # 如果是data:开头的网址,就要重试一次
|
||||
else:
|
||||
if element == None:
|
||||
element = elements
|
||||
@ -1199,8 +1200,16 @@ class BrowserThread(Thread):
|
||||
self.print_and_log("找不到循环元素: ", xpath)
|
||||
index = 0
|
||||
while index < len(elements):
|
||||
try:
|
||||
element = elements[index]
|
||||
element_text = element.text
|
||||
except StaleElementReferenceException: # 如果元素已经失效,重试
|
||||
self.print_and_log("元素已失效,重新获取元素|Element has expired, reacquiring element")
|
||||
elements = self.browser.find_elements(By.XPATH,
|
||||
xpath, iframe=node["parameters"]["iframe"])
|
||||
element = elements[index]
|
||||
for i in node["sequence"]: # 挨个顺序执行循环里所有的操作
|
||||
self.executeNode(i, elements[index],
|
||||
self.executeNode(i, element,
|
||||
xpath, index)
|
||||
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
||||
self.CONTINUE = False
|
||||
|
Loading…
x
Reference in New Issue
Block a user