Extension背景页弃用

This commit is contained in:
naibo 2023-05-26 00:14:06 +08:00
parent 6212724d55
commit 8dfad5b062
35 changed files with 284 additions and 199 deletions

View File

@ -185,12 +185,14 @@ async function beginInvoke(msg, ws) {
socket_flowchart.send(msg.message.pipe); //直接把消息转接
let message = JSON.parse(msg.message.pipe);
let type = message.type;
console.log("FROM Browser: ", message);
// if(type.indexOf("Click")>=0){
// await new Promise(resolve => setTimeout(resolve, 2000)); //等两秒
//
// }
} else {
socket_window.send(msg.message.pipe);
console.log("FROM Flowchart: ", JSON.parse(msg.message.pipe));
}
} catch {
dialog.showErrorBox("Error", "Please open the flowchart window first");
@ -240,6 +242,7 @@ let wss = new WebSocket.Server({port: websocket_port});
wss.on('connection', function (ws) {
ws.on('message', async function (message, isBinary) {
let msg = JSON.parse(message.toString());
console.log("\n\nGET A MESSAGE: ", msg);
// console.log(msg, msg.type, msg.message);
if (msg.type == 0) {
if (msg.message.id == 0) {

View File

@ -225,14 +225,14 @@
<option :value = 6>Webpage Title</option>
<option :value = 7>Element Screenshot</option>
<option :value = 8>OCR Results</option>
<option :value = 9>The return value after executing JavaScript script on this element</option>
<option :value = 9>The return value after executing JavaScript script on this element (start with 'return ')</option>
<option :value = 10>Selected value of the current select box</option>
<option :value = 11>Selected text of the current select box</option>
</select>
<div v-if='paras.parameters[paraIndex]["contentType"] == 9'>
<label>JavaScript Code: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
placeholder='The element can be represented by arguments[0]. Here is an example: return arguments[0].innerText + "US Dollar". This code extracts the innerText of the element and appends "US Dollar" to it.' v-model='paras.parameters[paraIndex]["JS"]'></textarea>
placeholder='The element should be represented by arguments[0]. Here is an example: return arguments[0].innerText + "US Dollar". This code extracts the innerText of the element and appends "US Dollar" to it.' v-model='paras.parameters[paraIndex]["JS"]'></textarea>
<label>Maximum wait time for script execution (0 represents unlimited wait time): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='paras.parameters[paraIndex]["JSWaitTime"]'></input>
</div>
@ -304,7 +304,7 @@
<select v-model='nowNode["parameters"]["codeMode"]' class="form-control">
<option value = 0>Execute JavaScript script</option>
<option value = 1>Execute operating system-level command</option>
<option v-if="nowNode['isInLoop']" value = 2>Execute JavaScript script for the current element inside the loop.</option>
<option v-if="nowNode['isInLoop']" value = 2>Execute JavaScript script for the current element inside the loop</option>
</select>
<div>
@ -365,7 +365,7 @@
<option value = 2>Fixed Element List</option>
<option value = 3>Text List</option>
<option value = 4>Weblink List</option>
<option value = 5>Return value of JavaScript command</option>
<option value = 5>Return value of JavaScript command (start with 'return ')</option>
<option value = 6>Return value of system command</option>
</select>
<div v-if='parseInt(loopType) < 2'>
@ -409,7 +409,7 @@
<p><label>(Advanced Operation) Define loop exit condition using code/script:</label></p>
<select v-model='nowNode["parameters"]["breakMode"]' class="form-control" style="font-weight: bold">
<option value=0>Do not set script (even if a script is written below, it will not be executed)</option>
<option value=1>JavaScript script</option>
<option value=1>JavaScript script (start with 'return ')</option>
<option value=2>Operating system-level command</option>
</select>
<div>
@ -423,7 +423,7 @@
</div>
<div class="elements" v-if="nodeType==9">
<label>The conditions are evaluated from left to right, which means if the condition in the leftmost branch is satisfied, the operations within that branch are executed. Otherwise, the condition in the next branch from left to right is evaluated, and so on.</label>
</div>
<div class="elements" v-if="nodeType==10">
@ -434,7 +434,7 @@
<option value = 2>Element inside current page</option>
<option v-if="nowNode['isInLoop']" value = 3>Text inside current loop</option>
<option v-if="nowNode['isInLoop']" value = 4>Element inside current loop</option>
<option value = 5>Return value of JavaScript command</option>
<option value = 5>Return value of JavaScript command (start with 'return ')</option>
<option value = 6>Return value of system command</option>
<option v-if="nowNode['isInLoop']" value = 7>Return value of JavaScript command for the current loop item</option>
</select>

View File

@ -225,7 +225,7 @@
<option :value = 6>页面标题</option>
<option :value = 7>元素截图</option>
<option :value = 8>OCR识别文字</option>
<option :value = 9>针对该元素的JavaScript代码返回值</option>
<option :value = 9>针对该元素的JavaScript代码返回值需以return 开头)</option>
<option :value = 10>当前选择框选中的选项值</option>
<option :value = 11>当前选择框选中的选项文本</option>
</select>
@ -313,7 +313,7 @@
<p style="margin-top: 15px">是否将执行后的输出/返回值作为字段记录:</p>
<p><select v-model='nowNode["parameters"]["recordASField"]' class="form-control">
<option value = 0></option>
<option value = 1></option>
<option value = 1>JavaScript脚本需要以return 开头)</option>
</select></p>
<label>最长等待脚本执行时间0代表无限等待 </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
@ -365,7 +365,7 @@
<option value = 2>固定元素列表</option>
<option value = 3>文本列表(多用于循环在文本框输入文本)</option>
<option value = 4>网址列表(多用于循环打开网页)</option>
<option value = 5>JavaScript命令返回值</option>
<option value = 5>JavaScript命令返回值需以return 开头)</option>
<option value = 6>系统命令返回值</option>
</select>
<div v-if='parseInt(loopType) < 2'>
@ -409,7 +409,7 @@
<p><label>(高级操作)使用代码/脚本定义循环退出条件: </label></p>
<select v-model='nowNode["parameters"]["breakMode"]' class="form-control" style="font-weight: bold">
<option value = 0>不设置脚本(选择这个下面写了脚本也不会执行)</option>
<option value = 1>JavaScript脚本</option>
<option value = 1>JavaScript脚本返回值需以return 开头)</option>
<option value = 2>操作系统级别命令</option>
</select>
<div>
@ -423,7 +423,7 @@
</div>
<div class="elements" v-if="nodeType==9">
<label>判断条件是从左往右判断的,即如果最左边的判断分支的条件满足,则执行最左边分支内的操作,否则判断从左向右第二个分支的条件是否满足,以此类推。</label>
</div>
<div class="elements" v-if="nodeType==10">
@ -434,9 +434,9 @@
<option value = 2>当前页面包括元素</option>
<option v-if="nowNode['isInLoop']" value = 3>当前循环项包括文本</option>
<option v-if="nowNode['isInLoop']" value = 4>当前循环项包括元素</option>
<option value = 5>JavaScript命令返回值</option>
<option value = 5>JavaScript命令返回值需以return 开头)</option>
<option value = 6>系统命令返回值</option>
<option v-if="nowNode['isInLoop']" value = 7>针对当前循环项的JavaScript命令返回值</option>
<option v-if="nowNode['isInLoop']" value = 7>针对当前循环项的JavaScript命令返回值需以return 开头)</option>
</select>
<div v-if='TClass>0 && TClass <5'>
<label>包含的文字/元素XPATH <span style="font-size: 30px!important;" title="相对XPATH写法以/开头如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为/html/body/div[1]/*[@id='tab-customer']"></span></label>

View File

@ -250,16 +250,16 @@
},
localExecuteInstant: function (with_user_data=false) {
let text = "";
if (getUrlParam("lang") == "en" || getUrlParam("lang") == "") {
text = "Are you sure to run this task locally now?";
} else {
text = "确定要立即在本地运行此任务吗?";
}
// if (getUrlParam("lang") == "en" || getUrlParam("lang") == "") {
// text = "Are you sure to run this task locally now?";
// } else {
// text = "确定要立即在本地运行此任务吗?";
// }
this.with_user_data = with_user_data;
if (confirm(text)) {
var para = {};
var t = $('#form').serializeArray();
// if (confirm(text)) {
let para = {};
let t = $('#form').serializeArray();
t.forEach(function (item, index) {
para[item.name] = item.value;
});
@ -282,7 +282,7 @@
$('#myModal').modal('show');
}
});
}
// }
},
remoteExecuteInstant: function () {
},

View File

@ -22,13 +22,17 @@
<div style="margin:0 auto;min-width: 70%;" id="taskList">
<h4 style="text-align: center;">{{"Task List~任务列表" | lang}}</h4>
<p><a v-if="type==3" href="javascript:void(0)" v-on:click="newTask" class="btn btn-primary">{{"New Task~创建新任务" | lang}}</a></p>
<div v-if="type != 3" style="margin-bottom: 20px">
<a class="btn btn-primary" href="https://github.com/NaiboWang/EasySpider/issues/22" target="_blank">{{"How to run task by schedule~定时执行任务教程" | lang}}</a>
<a class="btn btn-primary" href="" target="_blank">{{"How to run task by schedule~定时执行任务教程" | lang}}</a>
</div>
<div style="margin-bottom: 10px">
<table style="table-layout: auto;" class="table table-hover">
<thead>
<tr>
<th style="text-align: left">No.</th>
<th style="text-align: center">{{"Task Name~任务名称" | lang}}</th>
<th>网址</th>
<th>{{"URL~网址" | lang}}</th>
<th v-bind:colspan="type" style="min-width: 300px">{{"Operations~操作" | lang}}</th>
</tr>
</thead>

1
ElectronJS/tasks/0.json Normal file

File diff suppressed because one or more lines are too long

1
ElectronJS/tasks/1.json Normal file

File diff suppressed because one or more lines are too long

1
ElectronJS/tasks/10.json Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1
ElectronJS/tasks/12.json Normal file

File diff suppressed because one or more lines are too long

1
ElectronJS/tasks/13.json Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1
ElectronJS/tasks/3.json Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1
ElectronJS/tasks/74.json Normal file
View File

@ -0,0 +1 @@
{"id":74,"name":"Electronics, Cars, Fashion, Collectibles & More | eBay","url":"https://www.ebay.com","links":"https://www.ebay.com","create_time":"5/26/2023, 12:07:26 AM","version":"0.3.1","containJudge":false,"desc":"https://www.ebay.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://www.ebay.com","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"string","exampleValue":"https://www.ebay.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.ebay.com","links":"https://www.ebay.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":7,"title":"Mouse Move","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"hl-ad-row__text-wrapper\")]/div[3]/div[2]","wait":7,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":["/html/body/div[6]/div[7]/a[1]/div[2]/div[2]/div[3]/div[2]","//div[contains(., 'Get your t')]","//DIV[@class='hl-cta__default hl-cta__default-js hl-ad-row__cta-button hl-ad-row__cta']"]}}]}

1
ElectronJS/tasks/75.json Normal file
View File

@ -0,0 +1 @@
{"id":75,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/26/2023, 12:09:11 AM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://www.jd.com","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"Loop","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":7,"title":"Mouse Move","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":2,"title":"Click Element","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"//*[contains(@class, \"LeftSide_menu_list__qXCeM\")]/div[1]","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"]}}]}

1
ElectronJS/tasks/8.json Normal file

File diff suppressed because one or more lines are too long

1
ElectronJS/tasks/9.json Normal file

File diff suppressed because one or more lines are too long

View File

@ -12,7 +12,7 @@
"console": "integratedTerminal",
"justMyCode": true,
// "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
"args": ["--id", "15", "--headless", "0"]
"args": ["--id", "54", "--headless", "0"]
}
]
}

View File

@ -20,7 +20,7 @@ from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import StaleElementReferenceException, InvalidSelectorException
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains
@ -292,7 +292,7 @@ def executeNode(nodeId, loopValue="", loopPath="", index=0):
# 对判断条件的处理
def judgeExcute(node, loopElement, clickPath="", index=0):
rt = Time("IF Condition")
# rt = Time("IF Condition")
global bodyText # 引入bodyText
executeBranchId = 0 # 要执行的BranchId
for i in node["sequence"]:
@ -346,7 +346,7 @@ def judgeExcute(node, loopElement, clickPath="", index=0):
if code > 0:
executeBranchId = i
break
rt.end()
# rt.end()
if executeBranchId != 0:
executeNode(executeBranchId, loopElement, clickPath, index)
@ -387,9 +387,9 @@ def loopExcute(node, loopValue, clickPath="", index=0):
recordLog("click:" + node["parameters"]["xpath"])
except NoSuchElementException:
# except:
print("Single element not found: ", node["parameters"]["xpath"])
print("找不到单个元素: ", node["parameters"]["xpath"])
recordLog("clickNotFound:" + node["parameters"]["xpath"])
print("Single loop element not found: ", node["parameters"]["xpath"])
print("找不到要循环的单个元素: ", node["parameters"]["xpath"])
recordLog("Single loop element not found: " + node["parameters"]["xpath"])
for i in node["sequence"]: # 不带点击元素的把剩余的如提取数据的操作执行一遍
if node["option"] != 2:
executeNode(i, None, node["parameters"]["xpath"], 0)
@ -420,6 +420,10 @@ def loopExcute(node, loopValue, clickPath="", index=0):
try:
elements = browser.find_elements(By.XPATH,
node["parameters"]["xpath"])
if len(elements) == 0:
print("Loop element not found: ", node["parameters"]["xpath"])
print("找不到循环元素: ", node["parameters"]["xpath"])
recordLog("pathNotFound: " + node["parameters"]["xpath"])
for index in range(len(elements)):
for i in node["sequence"]: # 挨个顺序执行循环里所有的操作
executeNode(i, elements[index],
@ -453,7 +457,6 @@ def loopExcute(node, loopValue, clickPath="", index=0):
print("Loop element not found: ", node["parameters"]["xpath"])
print("找不到循环元素: ", node["parameters"]["xpath"])
recordLog("pathNotFound: " + node["parameters"]["xpath"])
pass # 循环中找不到元素就略过操作
except Exception as e:
raise
elif int(node["parameters"]["loopType"]) == 2: # 固定元素列表
@ -539,7 +542,7 @@ def loopExcute(node, loopValue, clickPath="", index=0):
# 打开网页事件
def openPage(para, loopValue):
rt = Time("打开网页")
# rt = Time("打开网页")
time.sleep(2) # 打开网页后强行等待至少2秒
global links
global urlId
@ -577,14 +580,14 @@ def openPage(para, loopValue):
Log('time out after set seconds when loading page: ' + url)
recordLog('time out after set seconds when loading page: ' + url)
browser.execute_script('window.stop()')
rt.end()
# rt.end()
try:
history["index"] = browser.execute_script("return history.length")
except TimeoutException:
browser.execute_script('window.stop()')
history["index"] = browser.execute_script("return history.length")
rt.end()
scrollDown(para, rt) # 控制屏幕向下滚动
# rt.end()
scrollDown(para) # 控制屏幕向下滚动
if containJudge:
global bodyText # 每次执行点击输入元素和打开网页操作后需要更新bodyText
try:
@ -599,7 +602,7 @@ def openPage(para, loopValue):
Log("Need to wait 1 second to get body text")
# 再执行一遍
bodyText = browser.find_element(By.CSS_SELECTOR, "body").text
rt.end()
# rt.end()
except Exception as e:
Log(e)
recordLog(str(e))
@ -608,24 +611,17 @@ def openPage(para, loopValue):
for key in outputParameters:
outputParameters[key] = ""
rt.end()
# rt.end()
# 键盘输入事件
def inputInfo(para, loopValue):
time.sleep(1) # 输入之前等待1秒
time.sleep(0.1) # 输入之前等待0.1秒
Log("Wait 1 second before input")
rt = Time("Input Text")
try:
textbox = browser.find_element(By.XPATH, para["xpath"])
except:
print("Cannot find input box element:" +
para["xpath"] + "Please try to set the wait time before executing this operation")
print("找不到输入框元素:" + para["xpath"] + "请尝试在执行此操作前设置等待时间")
recordLog("Cannot find input box element:" +
para["xpath"] + "Please try to set the wait time before executing this operation")
# textbox.send_keys(Keys.CONTROL, 'a')
# textbox.send_keys(Keys.BACKSPACE)
# textbox.send_keys(Keys.CONTROL, 'a')
# textbox.send_keys(Keys.BACKSPACE)
execute_code(2, para["beforeJS"], para["beforeJSWaitTime"], textbox) # 执行前置JS
# Send the HOME key
textbox.send_keys(Keys.HOME)
@ -640,14 +636,19 @@ def inputInfo(para, loopValue):
execute_code(2, para["afterJS"], para["afterJSWaitTime"], textbox) # 执行后置js
global bodyText # 每次执行点击输入元素和打开网页操作后需要更新bodyText
bodyText = browser.find_element(By.CSS_SELECTOR, "body").text
rt.end()
except:
print("Cannot find input box element:" +
para["xpath"] + ", please try to set the wait time before executing this operation")
print("找不到输入框元素:" + para["xpath"] + ",请尝试在执行此操作前设置等待时间")
recordLog("Cannot find input box element:" +
para["xpath"] + "Please try to set the wait time before executing this operation")
# 点击元素事件
def clickElement(para, loopElement=None, clickPath="", index=0):
global history
time.sleep(0.1) # 点击之前等待0.1秒
rt = Time("Click Element")
# rt = Time("Click Element")
Log("Wait 0.1 second before clicking element")
if para["useLoop"]: # 使用循环的情况下传入的clickPath就是实际的xpath
path = clickPath
@ -661,15 +662,15 @@ def clickElement(para, loopElement=None, clickPath="", index=0):
browser.set_script_timeout(maxWaitTime)
# 点击前对该元素执行一段JavaScript代码
try:
if para["beforeJS"] != "":
element = browser.find_element(By.XPATH, path)
if para["beforeJS"] != "":
execute_code(2, para["beforeJS"], para["beforeJSWaitTime"], element)
except:
print("Cannot find element:" +
path + "Please try to set the wait time before executing this operation")
print("找不到要点击的元素:" + path + "请尝试在执行此操作前设置等待时间")
path + ", please try to set the wait time before executing this operation")
print("找不到要点击的元素:" + path + "请尝试在执行此操作前设置等待时间")
recordLog("Cannot find element:" +
path + "Please try to set the wait time before executing this operation")
path + ", please try to set the wait time before executing this operation")
tempHandleNum = len(browser.window_handles) # 记录之前的窗口位置
try:
script = 'var result = document.evaluate(`' + path + \
@ -679,7 +680,7 @@ def clickElement(para, loopElement=None, clickPath="", index=0):
Log('time out after set seconds when loading clicked page')
recordLog('time out after set seconds when loading clicked page')
browser.execute_script('window.stop()')
rt.end()
# rt.end()
except Exception as e:
Log(e)
recordLog(str(e))
@ -694,7 +695,8 @@ def clickElement(para, loopElement=None, clickPath="", index=0):
except:
print("Cannot find element:" + path)
recordLog("Cannot find element:" +
path + "Please try to set the wait time before executing this operation")
path + ", please try to set the wait time before executing this operation")
print("找不到要点击的元素:" + path + ",请尝试在执行此操作前设置等待时间")
if tempHandleNum != len(browser.window_handles): # 如果有新标签页的行为发生
browser.switch_to.window(browser.window_handles[-1]) # 跳转到新的标签页
history["handle"] = browser.current_window_handle
@ -703,16 +705,16 @@ def clickElement(para, loopElement=None, clickPath="", index=0):
except TimeoutException:
browser.execute_script('window.stop()')
history["index"] = browser.execute_script("return history.length")
rt.end()
# rt.end()
else:
try:
history["index"] = browser.execute_script("return history.length")
except TimeoutException:
browser.execute_script('window.stop()')
history["index"] = browser.execute_script("return history.length")
rt.end()
# rt.end()
# 如果打开了新窗口,切换到新窗口
scrollDown(para, rt) # 根据参数配置向下滚动
scrollDown(para) # 根据参数配置向下滚动
if containJudge: # 有判断语句才执行以下操作
global bodyText # 每次执行点击输入元素和打开网页操作后需要更新bodyText
try:
@ -725,11 +727,11 @@ def clickElement(para, loopElement=None, clickPath="", index=0):
Log("wait one second after get body text")
# 再执行一遍
bodyText = browser.find_element(By.CSS_SELECTOR, "body").text
rt.end()
# rt.end()
except Exception as e:
Log(e)
recordLog(str(e))
rt.end()
# rt.end()
def get_content(p, element):
global saveName
@ -827,7 +829,7 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
if not isInLoop and para["wait"] == 0:
time.sleep(1) # 如果提取数据字段不在循环内而且设置的等待时间为0默认等待1秒
Log("Wait 1 second before extracting data")
rt = Time("Extract Data")
# rt = Time("Extract Data")
for p in para["paras"]:
content = ""
if not (p["contentType"] == 5 or p["contentType"] == 6): # 如果不是页面标题或URL去找元素
@ -846,16 +848,21 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
p["relativeXPath"][1:])
else:
element = browser.find_element(By.XPATH, p["relativeXPath"])
except NoSuchElementException: # 找不到元素的时候,使用默认值
except (NoSuchElementException, InvalidSelectorException): # 找不到元素的时候,使用默认值
# print(p)
try:
content = p["default"]
except Exception as e:
content = ""
outputParameters[p["name"]] = content
print('Element %s not found when extracting data, use default' % p["relativeXPath"])
print("提取数据操作时,元素 %s 未找到,使用默认值" % p["relativeXPath"])
try:
if not dataNotFoundKeys[p["name"]]:
print('Element %s not found with parameter name %s when extracting data, use default, this error will only show once' % (p["relativeXPath"], p["name"]))
print("提取数据操作时,字段名 %s 对应XPath %s 未找到,使用默认值,本字段将不再重复报错" % (p["name"], p["relativeXPath"]))
dataNotFoundKeys[p["name"]] = True
recordLog('Element %s not found, use default' % p["relativeXPath"])
except:
pass
continue
except TimeoutException: # 超时的时候设置超时值
Log('time out after set seconds when getting data')
@ -869,7 +876,7 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
p["relativeXPath"][1:])
else:
element = browser.find_element(By.XPATH, p["relativeXPath"])
rt.end()
# rt.end()
else:
element = browser.find_element(By.XPATH, "//body")
try:
@ -905,7 +912,7 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
print(value[:15], " ", end="")
print("")
OUTPUT.append(line)
rt.end()
# rt.end()
# 判断字段是否为空
@ -933,7 +940,7 @@ def clean():
global saveName, log, OUTPUT, browser, SAVED
saveData(exit=True)
browser.quit()
sys.exit(saveName + '.csv')
sys.exit(0)
if __name__ == '__main__':
@ -1090,11 +1097,13 @@ if __name__ == '__main__':
bodyText = "" # 记录bodyText
tOut = service["outputParameters"] # 生成输出参数对象
outputParameters = {}
dataNotFoundKeys = {} # 记录没有找到数据的key
log = "" # 记下现在总共开了多少个标签页
history = {"index": 0, "handle": None} # 记录页面现在所以在的历史记录的位置
SAVED = False # 记录是否已经存储了
for para in tOut:
outputParameters[para["name"]] = ""
dataNotFoundKeys[para["name"]] = False
OUTPUT[0].append(para["name"])
# 挨个执行程序
urlId = 0 # 全局记录变量

View File

@ -2,4 +2,4 @@ rmdir /s /q build
rmdir /s /q dist
pyinstaller -F --icon=favicon.ico easyspider_executestage.py
del ..\ElectronJS\chrome_win32\easyspider_executestage.exe
move dist\easyspider_executestage.exe ..\ElectronJS\chrome_win32\easyspider_executestage.exe
copy dist\easyspider_executestage.exe ..\ElectronJS\chrome_win32\easyspider_executestage.exe

View File

@ -2,4 +2,4 @@ rmdir /s /q build
rmdir /s /q dist
pyinstaller -F --icon=favicon.ico easyspider_executestage.py
del ..\ElectronJS\chrome_win64\easyspider_executestage.exe
move dist\easyspider_executestage.exe ..\ElectronJS\chrome_win64\easyspider_executestage.exe
copy dist\easyspider_executestage.exe ..\ElectronJS\chrome_win64\easyspider_executestage.exe

View File

@ -1,74 +1,84 @@
//此变量用于监听是否加载了新的页面(包括新窗口打开),如果是,增加变量值,用于传回后台。
var tabList = []; //用来记录打开的新的tab的id
var nowTabId = null;
var nowTabIndex = 0; //重要变量!!
var parameterNum = 1; //默认参数索引值
chrome.storage.local.set({ "parameterNum": 1 }); //修改默认的参数索引值
// chrome.tabs.update(6,{"active":true}) //一行就可以切换标签页
chrome.tabs.onActivated.addListener(function(activeInfo) {
nowTabId = activeInfo.tabId; //记录现在活动的tabid
if (tabList.indexOf(nowTabId) != -1) {
nowTabIndex = tabList.indexOf(nowTabId);
}
});
// 监听来自content-script的消息
chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
if (request.type == 0) {
if (tabList.indexOf(sender["tab"]["id"]) < 0) { //元素不存在加入数组
tabList.push(sender["tab"]["id"]);
}
nowTabIndex = tabList.indexOf(nowTabId);
sendResponse({ type: 0, "msg": "Get!" }); //回传一个消息
} else if (request.type == 1) { //前台询问参数索引值
sendResponse({ type: 1, "value": parameterNum }); //回传一个消息
} else if (request.type == 2) {
let message = {
type: 2, //消息类型2代表键盘输入
message: { "keyboardStr": request.value, "xpath": request.xpath, "id": request.id } // {}全选{BS}退格
};
ws.send(JSON.stringify(message));
chrome.tabs.query({active: true, currentWindow: true}, function(tabs) {
// 获取当前选项卡的 ID
const tabId = tabs[0].id;
const url = tabs[0].url;
// 停止当前页面的加载
chrome.tabs.executeScript(tabId, {code: 'window.stop();'});
});
} else if (request.type == 3) {
let tmsg = request.msg;
tmsg.tabIndex = nowTabIndex; //赋值当前tab的id
let message = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(request.msg)}
};
console.log(message);
ws.send(JSON.stringify(message));
}
});
// 打开一个 web socket
var ws = new WebSocket("ws://localhost:8084");
ws.onopen = function() {
// Web Socket 已连接上,使用 send() 方法发送数据
console.log("已连接");
let message = {
type: 0, //消息类型0代表链接操作
message: {
id: 0, //socket id
}
};
this.send(JSON.stringify(message));
};
ws.onmessage = function(evt) {
evt = JSON.parse(evt.data);
if (evt["type"] == "0") { //0代表更新参数添加索引值
chrome.storage.local.set({ "parameterNum": parseInt(evt["value"]) }); //修改值
}
};
ws.onclose = function() {
// 关闭 websocket
console.log("连接已关闭...");
};
// //此变量用于监听是否加载了新的页面(包括新窗口打开),如果是,增加变量值,用于传回后台。
//
// var tabList = []; //用来记录打开的新的tab的id
// var nowTabId = null;
// var nowTabIndex = 0; //重要变量!!
// var parameterNum = 1; //默认参数索引值
//
// chrome.storage.local.set({ "parameterNum": 1 }); //修改默认的参数索引值
// // chrome.tabs.update(6,{"active":true}) //一行就可以切换标签页
// chrome.tabs.onActivated.addListener(function(activeInfo) {
// nowTabId = activeInfo.tabId; //记录现在活动的tabid
// if (tabList.indexOf(nowTabId) != -1) {
// nowTabIndex = tabList.indexOf(nowTabId);
// }
// });
// // 监听来自content-script的消息
// chrome.runtime.onMessage.addListener(function(request, sender, sendResponse) {
// if (request.type == 0) {
// if (tabList.indexOf(sender["tab"]["id"]) < 0) { //元素不存在加入数组
// tabList.push(sender["tab"]["id"]);
// }
// nowTabIndex = tabList.indexOf(nowTabId);
// sendResponse({ type: 0, "msg": "Get!" }); //回传一个消息
// } else if (request.type == 1) { //前台询问参数索引值
// sendResponse({ type: 1, "value": parameterNum }); //回传一个消息
// } else if (request.type == 2) {
// let message = {
// type: 2, //消息类型2代表键盘输入
// message: { "keyboardStr": request.value, "xpath": request.xpath, "id": request.id } // {}全选{BS}退格
// };
// ws.send(JSON.stringify(message));
// chrome.tabs.query({active: true, currentWindow: true}, function(tabs) {
// // 获取当前选项卡的 ID
// const tabId = tabs[0].id;
// const url = tabs[0].url;
//
// // 停止当前页面的加载
// // chrome.tabs.executeScript(tabId, {code: 'window.stop();'});
// });
// } else if (request.type == 3) {
// let tmsg = request.msg;
// tmsg.tabIndex = nowTabIndex; //赋值当前tab的id
// let message = {
// type: 3, //消息类型3代表元素增加事件
// from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
// message: {"pipe": JSON.stringify(request.msg)}
// };
// console.log(message);
// ws.send(JSON.stringify(message));
// }
// });
// let testValue = Math.floor(Math.random() * (99999999)).toString();
// // 打开一个 web socket
// let ws = new WebSocket("ws://localhost:8084");
// ws.onopen = function() {
// // Web Socket 已连接上,使用 send() 方法发送数据
// console.log("已连接");
// let message = {
// type: 0, //消息类型0代表链接操作
// message: {
// id: 0, //socket id
// testValue: testValue,
// }
// };
// this.send(JSON.stringify(message));
// };
// ws.onmessage = function(evt) {
// evt = JSON.parse(evt.data);
// if (evt["type"] == "0") { //0代表更新参数添加索引值
// chrome.storage.local.set({ "parameterNum": parseInt(evt["value"]) }); //修改值
// }
// };
// ws.onclose = function() {
// // 关闭 websocket
// console.log("连接已关闭...");
// let message = {
// type: 500, //消息类型0代表链接操作
// message: {
// id: 0, //socket id
// testValue: testValue,
// }
// };
// this.send(JSON.stringify(message));
// };

View File

@ -2,18 +2,18 @@
import {getElementXPaths, global, readXPath} from "./global.js";
var startMsg = { "type": 0, msg: ""};
chrome.runtime.sendMessage(startMsg, function(response) {
console.log(response.msg);
}); //每次打开新页面的时候需要告诉后台
chrome.runtime.onMessage.addListener(
function(request, sender, sendResponse) {
if (request["type"] == 1){
sendResponse("回答处理结果");
}
}
);
// var startMsg = { "type": 0, msg: ""};
//
// chrome.runtime.sendMessage(startMsg, function(response) {
// console.log(response.msg);
// }); //每次打开新页面的时候需要告诉后台
// chrome.runtime.onMessage.addListener(
// function(request, sender, sendResponse) {
// if (request["type"] == 1){
// sendResponse("回答处理结果");
// }
// }
// );
global.ws = new WebSocket("ws://localhost:8084");
global.ws.onopen = function() {
@ -38,11 +38,19 @@ export function input(value) {
"allXPaths": getElementXPaths(global.nodeList[0]["node"]),
"value": value,
};
let msg = { type: 3, msg: message };
window.stop();
chrome.runtime.sendMessage(msg);
msg = { type: 2, value: value, xpath: message.xpath, id: global.id};
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
// msg = { type: 2, value: value, xpath: message.xpath, id: global.id};
let message_keyboard = {
type: 2, //消息类型2代表键盘输入
message: { "keyboardStr": value, "xpath": message.xpath, "id": global.id } // {}全选{BS}退格
};
global.ws.send(JSON.stringify(message_keyboard));
}
//点击元素操作
@ -55,8 +63,12 @@ export function sendSingleClick() {
"xpath": readXPath(global.nodeList[0]["node"], 0),
"allXPaths": getElementXPaths(global.nodeList[0]["node"]),
};
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
}
export function sendChangeOption(optionMode, optionValue){
@ -70,8 +82,12 @@ export function sendChangeOption(optionMode, optionValue){
"xpath": readXPath(global.nodeList[0]["node"], 0),
"allXPaths": getElementXPaths(global.nodeList[0]["node"]),
};
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
}
export function sendMouseMove(){
@ -83,8 +99,12 @@ export function sendMouseMove(){
"xpath": readXPath(global.nodeList[0]["node"], 0),
"allXPaths": getElementXPaths(global.nodeList[0]["node"]),
};
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
}
export function sendLoopMouseMove(){
@ -110,8 +130,12 @@ export function sendLoopMouseMove(){
message["pathList"].push(readXPath(global.nodeList[i]["node"], 0));
}
}
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
}
//采集单个元素
@ -122,8 +146,12 @@ export function collectSingle() {
"tabIndex": -1,
"parameters": global.outputParameters,
};
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
}
//采集无规律多元素
@ -134,8 +162,12 @@ export function collectMultiNoPattern() {
"tabIndex": -1,
"parameters": global.outputParameters,
};
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
}
//采集有规律多元素
@ -164,8 +196,12 @@ export function collectMultiWithPattern() {
message["pathList"].push(readXPath(global.nodeList[i]["node"], 0));
}
}
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
}
//循环点击单个元素
@ -183,8 +219,12 @@ export function sendLoopClickSingle(name) {
if (name == "下一页元素") {
message.nextPage = true;
}
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
}
//循环点击每个元素
@ -211,8 +251,12 @@ export function sendLoopClickEvery() {
message["pathList"].push(readXPath(global.nodeList[i]["node"], 0));
}
}
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
let message_action = {
type: 3, //消息类型3代表元素增加事件
from: 0, //0代表从浏览器到流程图1代表从流程图到浏览器
message: {"pipe": JSON.stringify(message)}
};
global.ws.send(JSON.stringify(message_action));
}
//检测是否xpath对应的元素被全选了个数判断即可

View File

@ -69,9 +69,9 @@ This software is for learning and communication only. **It is strictly forbidden
For the crawler operations of government and military websites, **the author will not answer any questions** in order to avoid violating relevant national laws, regulations and policies.
同时,软件受到专利权保护,如要用于商业用途,请联系[浙江大学天道专利事务所](media/patent.png)进行付费操作。
同时,软件受到专利权保护,如要用于商业用途,请联系[浙江大学天道专利事务所](media/patent.png)进行专利授权等付费操作。
At the same time, the software is protected by patent rights. If you want to use it for commercial purposes, please contact [Zhejiang University Tiandao Patent Office](media/patent.png) for payment and other operations.
At the same time, the software is protected by patent rights. If you want to use it for commercial purposes, please contact [Zhejiang University Tiandao Patent Agency](media/patent.png) for patent authorization and other paid operations.
## 出版物/Publications

1
Releases/.gitignore vendored
View File

@ -1,4 +1,5 @@
EasySpider
EasySpider.app/
EasySpider_windows_amd64/user_data
*.tmp
*.7z

View File

@ -1 +1 @@
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data12","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\Releases\\EasySpider_windows_amd64\\user_data1"}
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\Releases\\EasySpider_windows_amd64\\user_data"}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"id":2,"name":"鼠标移动和页面加载测试","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/25/2023, 5:48:38 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://www.jd.com","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":20,"scrollType":"2","scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":7,"title":"Mouse Move","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"LeftSide_menu_list__qXCeM\")]/div[1]","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"]}},{"id":3,"index":3,"parentId":0,"type":1,"option":8,"title":"Loop","sequence":[4],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":5,"index":4,"parentId":3,"type":0,"option":7,"title":"Mouse Move","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}},{"id":4,"index":5,"parentId":0,"type":0,"option":2,"title":"Click Element","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search-link\"]/i[1]","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"maxWaitTime":20,"paras":[],"allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/a[1]/i[1]","//i[contains(., '')]"]}}]}

File diff suppressed because one or more lines are too long