mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-22 23:24:22 +08:00
执行操作前可设置等待指定元素出现
This commit is contained in:
parent
92d6315a22
commit
528500f795
@ -85,6 +85,10 @@ let flowchart_window = null;
|
|||||||
let current_handle = null;
|
let current_handle = null;
|
||||||
let old_handles = [];
|
let old_handles = [];
|
||||||
let handle_pairs = {};
|
let handle_pairs = {};
|
||||||
|
let socket_window = null;
|
||||||
|
let socket_start = null;
|
||||||
|
let socket_flowchart = null;
|
||||||
|
let invoke_window = null;
|
||||||
|
|
||||||
// var ffi = require('ffi-napi');
|
// var ffi = require('ffi-napi');
|
||||||
// var libm = ffi.Library('libm', {
|
// var libm = ffi.Library('libm', {
|
||||||
@ -233,27 +237,7 @@ async function beginInvoke(msg, ws) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// .then(function (element) {
|
|
||||||
// console.log("element", element, handles);
|
|
||||||
// element.sendKeys(Key.HOME, Key.chord(Key.SHIFT, Key.END), keyInfo);
|
|
||||||
// exit = true;
|
|
||||||
// }, function (error) {
|
|
||||||
// console.log("error", error);
|
|
||||||
// len = len - 1;
|
|
||||||
// if (len == 0) {
|
|
||||||
// exit = true;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// );
|
|
||||||
}
|
}
|
||||||
// let handles = driver.getAllWindowHandles();
|
|
||||||
// driver.switchTo().window(handles[handles.length - 1]);
|
|
||||||
// driver.findElement(By.xpath(msg.message.xpath)).sendKeys(Key.HOME, Key.chord(Key.SHIFT, Key.END), keyInfo);
|
|
||||||
// robot.keyTap("a", "control");
|
|
||||||
// robot.keyTap("backspace");
|
|
||||||
// robot.typeString(keyInfo);
|
|
||||||
// robot.keyTap("shift");
|
|
||||||
// robot.keyTap("shift");
|
|
||||||
} else if (msg.type == 3) {
|
} else if (msg.type == 3) {
|
||||||
try {
|
try {
|
||||||
if (msg.from == 0) {
|
if (msg.from == 0) {
|
||||||
@ -368,6 +352,11 @@ async function beginInvoke(msg, ws) {
|
|||||||
} catch {
|
} catch {
|
||||||
console.log("open devtools error");
|
console.log("open devtools error");
|
||||||
}
|
}
|
||||||
|
try{
|
||||||
|
invoke_window.openDevTools();
|
||||||
|
} catch {
|
||||||
|
console.log("open devtools error");
|
||||||
|
}
|
||||||
} else if (msg.type == 7) {
|
} else if (msg.type == 7) {
|
||||||
// 获得当前页面Cookies
|
// 获得当前页面Cookies
|
||||||
try{
|
try{
|
||||||
@ -383,9 +372,6 @@ async function beginInvoke(msg, ws) {
|
|||||||
|
|
||||||
const WebSocket = require('ws');
|
const WebSocket = require('ws');
|
||||||
const {all} = require("express/lib/application");
|
const {all} = require("express/lib/application");
|
||||||
let socket_window = null;
|
|
||||||
let socket_start = null;
|
|
||||||
let socket_flowchart = null;
|
|
||||||
let wss = new WebSocket.Server({port: websocket_port});
|
let wss = new WebSocket.Server({port: websocket_port});
|
||||||
wss.on('connection', function (ws) {
|
wss.on('connection', function (ws) {
|
||||||
ws.on('message', async function (message, isBinary) {
|
ws.on('message', async function (message, isBinary) {
|
||||||
@ -521,7 +507,7 @@ function handleOpenBrowser(event, lang = "en", user_data_folder = "", mobile = f
|
|||||||
}
|
}
|
||||||
|
|
||||||
function handleOpenInvoke(event, lang = "en") {
|
function handleOpenInvoke(event, lang = "en") {
|
||||||
const window = new BrowserWindow({icon: iconPath});
|
invoke_window = new BrowserWindow({icon: iconPath});
|
||||||
let url = "";
|
let url = "";
|
||||||
language = lang;
|
language = lang;
|
||||||
if (lang == "en") {
|
if (lang == "en") {
|
||||||
@ -530,10 +516,10 @@ function handleOpenInvoke(event, lang = "en") {
|
|||||||
url = server_address + `/taskGrid/taskList.html?type=1&wsport=${websocket_port}&backEndAddressServiceWrapper=` + server_address + "&lang=zh";
|
url = server_address + `/taskGrid/taskList.html?type=1&wsport=${websocket_port}&backEndAddressServiceWrapper=` + server_address + "&lang=zh";
|
||||||
}
|
}
|
||||||
// and load the index.html of the app.
|
// and load the index.html of the app.
|
||||||
window.loadURL(url, { extraHeaders: 'pragma: no-cache\n' });
|
invoke_window.loadURL(url, { extraHeaders: 'pragma: no-cache\n' });
|
||||||
window.maximize();
|
invoke_window.maximize();
|
||||||
mainWindow.hide();
|
mainWindow.hide();
|
||||||
window.on('close', function (event) {
|
invoke_window.on('close', function (event) {
|
||||||
mainWindow.show();
|
mainWindow.show();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,10 @@
|
|||||||
margin-top: 10px;
|
margin-top: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
label{
|
||||||
|
margin-left: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
div.node {
|
div.node {
|
||||||
height: 45px;
|
height: 45px;
|
||||||
width: 150px;
|
width: 150px;
|
||||||
@ -19,38 +23,38 @@
|
|||||||
padding: 5px;
|
padding: 5px;
|
||||||
margin: 10px auto;
|
margin: 10px auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
.arrow {
|
.arrow {
|
||||||
margin: 10px auto;
|
margin: 10px auto;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
font-size: 23px!important;
|
font-size: 23px!important;
|
||||||
color: black;
|
color: black;
|
||||||
}
|
}
|
||||||
|
|
||||||
.arrow:hover {
|
.arrow:hover {
|
||||||
background-color: deepskyblue!important;
|
background-color: deepskyblue!important;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
color: white;
|
color: white;
|
||||||
}
|
}
|
||||||
|
|
||||||
.branchAdd {
|
.branchAdd {
|
||||||
margin: 10px auto;
|
margin: 10px auto;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
font-size: 18px!important;
|
font-size: 18px!important;
|
||||||
color: black;
|
color: black;
|
||||||
}
|
}
|
||||||
|
|
||||||
.branchAdd:hover {
|
.branchAdd:hover {
|
||||||
background-color: deepskyblue;
|
background-color: deepskyblue;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
color: white;
|
color: white;
|
||||||
}
|
}
|
||||||
|
|
||||||
div.node:hover {
|
div.node:hover {
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
background: navy;
|
background: navy;
|
||||||
}
|
}
|
||||||
|
|
||||||
.loop {
|
.loop {
|
||||||
border: skyblue solid;
|
border: skyblue solid;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
@ -61,11 +65,11 @@
|
|||||||
margin: 10px auto;
|
margin: 10px auto;
|
||||||
border-radius: 7px;
|
border-radius: 7px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.options {
|
.options {
|
||||||
height: 35px;
|
height: 35px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.judge {
|
.judge {
|
||||||
/* display: flex; */
|
/* display: flex; */
|
||||||
text-align: center;
|
text-align: center;
|
||||||
@ -76,7 +80,7 @@
|
|||||||
justify-content: center;
|
justify-content: center;
|
||||||
border-radius: 7px;
|
border-radius: 7px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.branch {
|
.branch {
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
margin: 5px;
|
margin: 5px;
|
||||||
@ -88,11 +92,11 @@
|
|||||||
margin: 10px;
|
margin: 10px;
|
||||||
border-radius: 7px;
|
border-radius: 7px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.sequence {
|
.sequence {
|
||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toolbox button {
|
.toolbox button {
|
||||||
margin-top: 5px;
|
margin-top: 5px;
|
||||||
margin-left: 3px;
|
margin-left: 3px;
|
||||||
@ -100,35 +104,35 @@
|
|||||||
width: 80%;
|
width: 80%;
|
||||||
font-size: 15px!important;
|
font-size: 15px!important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.Modify {
|
.Modify {
|
||||||
margin: 20px;
|
margin: 20px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.Modify input {
|
.Modify input {
|
||||||
font-size: 17px!important;
|
font-size: 17px!important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.elements {
|
.elements {
|
||||||
font-size: 17px!important;
|
font-size: 17px!important;
|
||||||
margin: 10px 0;
|
margin: 10px 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.elements p {
|
.elements p {
|
||||||
margin: 5px 0;
|
margin: 5px 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.elements input[type=checkbox] {
|
.elements input[type=checkbox] {
|
||||||
width: 20px;
|
width: 20px;
|
||||||
height: 20px;
|
height: 20px;
|
||||||
vertical-align: sub;
|
vertical-align: sub;
|
||||||
margin-right: 5px;
|
margin-right: 5px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.elements textarea {
|
.elements textarea {
|
||||||
min-height: 100px;
|
min-height: 100px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.elements label {
|
.elements label {
|
||||||
font-size: 17px!important;
|
font-size: 17px!important;
|
||||||
margin: 10px 0;
|
margin: 10px 0;
|
||||||
@ -143,7 +147,7 @@
|
|||||||
border: 1px solid rgb(78, 78, 78);
|
border: 1px solid rgb(78, 78, 78);
|
||||||
padding-left: 2px;
|
padding-left: 2px;
|
||||||
}
|
}
|
||||||
|
|
||||||
table {
|
table {
|
||||||
table-layout: fixed;
|
table-layout: fixed;
|
||||||
word-break: break-all;
|
word-break: break-all;
|
||||||
@ -153,7 +157,7 @@
|
|||||||
white-space: nowrap;
|
white-space: nowrap;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
} */
|
} */
|
||||||
|
|
||||||
.toolkitcontain {
|
.toolkitcontain {
|
||||||
border: 1px solid #cdd!important;
|
border: 1px solid #cdd!important;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
@ -163,7 +167,7 @@
|
|||||||
margin-top: 10px;
|
margin-top: 10px;
|
||||||
position: relative;
|
position: relative;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toolkitcontain table {
|
.toolkitcontain table {
|
||||||
table-layout: fixed;
|
table-layout: fixed;
|
||||||
word-break: break-all;
|
word-break: break-all;
|
||||||
@ -173,7 +177,7 @@
|
|||||||
white-space: nowrap;
|
white-space: nowrap;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toolkitcontain th,
|
.toolkitcontain th,
|
||||||
.toolkitcontain td,
|
.toolkitcontain td,
|
||||||
.toolkitcontain tr {
|
.toolkitcontain tr {
|
||||||
@ -187,7 +191,7 @@
|
|||||||
padding-left: 1px;
|
padding-left: 1px;
|
||||||
-webkit-user-select: none;
|
-webkit-user-select: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toolkitcontain .toolkittb2 {
|
.toolkitcontain .toolkittb2 {
|
||||||
position: sticky;
|
position: sticky;
|
||||||
top: 0px;
|
top: 0px;
|
||||||
@ -195,13 +199,13 @@
|
|||||||
background-color: azure;
|
background-color: azure;
|
||||||
z-index: 1000;
|
z-index: 1000;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toolkitcontain .toolkittb4 {
|
.toolkitcontain .toolkittb4 {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
}
|
}
|
||||||
|
|
||||||
.toolkitcontain a {
|
.toolkitcontain a {
|
||||||
font-size: 13px!important;
|
font-size: 13px!important;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
color: blue;
|
color: blue;
|
||||||
}
|
}
|
||||||
|
@ -580,7 +580,14 @@ If the expression returns a value greater than 0 or evaluates to True, the opera
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div style="margin-top:5px">
|
<div style="margin-top:5px">
|
||||||
<label>Seconds <b>after executed</b> (Can be set to a decimal, such as 0.5):</label>
|
<label>Wait for the following elements to appear <b>before</b> executing:</label>
|
||||||
|
<textarea onkeydown="inputDelete(event)" class="form-control" style="min-height: 30px" v-model='nowNode["parameters"]["waitElement"]'
|
||||||
|
placeholder="Enter the XPath of the element to wait for, leave blank to skip waiting"></textarea>
|
||||||
|
<label style="margin-top:5px">In which iframe is the element located? Set to 0 if the element is not inside an iframe:</label>
|
||||||
|
<input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['waitElementIframeIndex']" type="number" required></input>
|
||||||
|
<label style="margin-top:5px">Maximum waiting time for element appearance (in seconds):</label>
|
||||||
|
<input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['waitElementTime']" type="number" required></input>
|
||||||
|
<label style="margin-top:5px">Wait seconds <b>after</b> execution (can set decimal values, e.g., 0.5):</label>
|
||||||
<input onkeydown="inputDelete(event)" required type="number" class="form-control" v-model.number='list.nl[index.nowNodeIndex]["parameters"]["wait"]'></input>
|
<input onkeydown="inputDelete(event)" required type="number" class="form-control" v-model.number='list.nl[index.nowNodeIndex]["parameters"]["wait"]'></input>
|
||||||
<label>Wait Type</label>
|
<label>Wait Type</label>
|
||||||
<select v-model='list.nl[index.nowNodeIndex]["parameters"]["waitType"]' class="form-control">
|
<select v-model='list.nl[index.nowNodeIndex]["parameters"]["waitType"]' class="form-control">
|
||||||
|
@ -625,31 +625,7 @@ document.oncontextmenu = function() {
|
|||||||
return false;
|
return false;
|
||||||
} //屏蔽右键菜单
|
} //屏蔽右键菜单
|
||||||
//删除元素
|
//删除元素
|
||||||
document.onkeydown = function(e) {
|
|
||||||
if (nowNode != null && e.keyCode == 46) {
|
|
||||||
// if (confirm("确定要删除元素吗?")) {
|
|
||||||
deleteElement();
|
|
||||||
// }
|
|
||||||
} else { //ctrl+s保存服务
|
|
||||||
let currKey = 0;
|
|
||||||
currKey = e.keyCode || e.which || e.charCode;
|
|
||||||
if (currKey == 83 && (e.ctrlKey || e.metaKey)) {
|
|
||||||
$('#save').click();
|
|
||||||
return true;
|
|
||||||
} else if (currKey == 116) {
|
|
||||||
location.reload();
|
|
||||||
} else if (currKey == 123) {
|
|
||||||
console.log("打开devtools")
|
|
||||||
let command = new WebSocket("ws://localhost:"+getUrlParam("wsport"))
|
|
||||||
command.onopen = function() {
|
|
||||||
let message = {
|
|
||||||
type: 6, //消息类型,0代表连接操作
|
|
||||||
};
|
|
||||||
this.send(JSON.stringify(message));
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function inputDelete(e) {
|
function inputDelete(e) {
|
||||||
if (e.keyCode == 46) {
|
if (e.keyCode == 46) {
|
||||||
|
@ -580,9 +580,16 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div style="margin-top:5px">
|
<div style="margin-top:5px">
|
||||||
<label><b>执行后</b>等待秒数(所有等待时间均可设置为小数,如0.5):</label>
|
<label>操作<b>执行前</b>等待以下元素出现:</label>
|
||||||
|
<textarea onkeydown="inputDelete(event)" class="form-control" style="min-height: 30px" v-model='nowNode["parameters"]["waitElement"]'
|
||||||
|
placeholder="填写要等待出现元素的XPath,不填写则不等待"></textarea>
|
||||||
|
<label style="margin-top:5px">要等待的元素在页面第几个iframe中,0表示元素不在iframe中:</label>
|
||||||
|
<input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['waitElementIframeIndex']" type="number" required></input>
|
||||||
|
<label style="margin-top:5px">元素出现的最长等待时间(秒):</label>
|
||||||
|
<input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['waitElementTime']" type="number" required></input>
|
||||||
|
<label style="margin-top:5px">操作<b>执行后</b>等待秒数(所有等待时间均可设置为小数,如0.5):</label>
|
||||||
<input onkeydown="inputDelete(event)" required type="number" class="form-control" v-model.number='list.nl[index.nowNodeIndex]["parameters"]["wait"]'></input>
|
<input onkeydown="inputDelete(event)" required type="number" class="form-control" v-model.number='list.nl[index.nowNodeIndex]["parameters"]["wait"]'></input>
|
||||||
<label>等待类型</label>
|
<label style="margin-top:5px">等待类型</label>
|
||||||
<select v-model='list.nl[index.nowNodeIndex]["parameters"]["waitType"]' class="form-control">
|
<select v-model='list.nl[index.nowNodeIndex]["parameters"]["waitType"]' class="form-control">
|
||||||
<option value = 0>固定等待(设置等10秒就等10秒)</option>
|
<option value = 0>固定等待(设置等10秒就等10秒)</option>
|
||||||
<option value = 1>随机等待(设置等10秒会随机等10×0.5 - 10 × 1.5 秒)</option>
|
<option value = 1>随机等待(设置等10秒会随机等10×0.5 - 10 × 1.5 秒)</option>
|
||||||
|
@ -75,3 +75,34 @@ function isValidMySQLTableName(tableName) {
|
|||||||
return pattern.test(tableName);
|
return pattern.test(tableName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
document.onkeydown = function(e) {
|
||||||
|
let t = false;
|
||||||
|
try{
|
||||||
|
t = nowNode;
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
if (t && nowNode != null && e.keyCode == 46) {
|
||||||
|
// if (confirm("确定要删除元素吗?")) {
|
||||||
|
deleteElement();
|
||||||
|
// }
|
||||||
|
} else { //ctrl+s保存服务
|
||||||
|
let currKey = 0;
|
||||||
|
currKey = e.keyCode || e.which || e.charCode;
|
||||||
|
if (currKey == 83 && (e.ctrlKey || e.metaKey)) {
|
||||||
|
$('#save').click();
|
||||||
|
return true;
|
||||||
|
} else if (currKey == 116) {
|
||||||
|
location.reload();
|
||||||
|
} else if (currKey == 123) {
|
||||||
|
console.log("打开devtools")
|
||||||
|
let command = new WebSocket("ws://localhost:"+getUrlParam("wsport"))
|
||||||
|
command.onopen = function() {
|
||||||
|
let message = {
|
||||||
|
type: 6, //消息类型,0代表连接操作
|
||||||
|
};
|
||||||
|
this.send(JSON.stringify(message));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -146,6 +146,9 @@ function addParameters(t) {
|
|||||||
beforeJSWaitTime: 0, //执行前js等待时间
|
beforeJSWaitTime: 0, //执行前js等待时间
|
||||||
afterJS: "", //执行后执行的js
|
afterJS: "", //执行后执行的js
|
||||||
afterJSWaitTime: 0, //执行后js等待时间
|
afterJSWaitTime: 0, //执行后js等待时间
|
||||||
|
waitElement: "", //等待元素
|
||||||
|
waitElementTime: 10, //等待元素时间
|
||||||
|
waitElementIframeIndex: 0, //等待元素在第几个iframe中
|
||||||
}; //公共参数处理
|
}; //公共参数处理
|
||||||
if (t.option == 1) {
|
if (t.option == 1) {
|
||||||
t["parameters"]["url"] = "about:blank";
|
t["parameters"]["url"] = "about:blank";
|
||||||
@ -518,4 +521,4 @@ function LANG(zh, en) {
|
|||||||
} else {
|
} else {
|
||||||
return en;
|
return en;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/183.json
Normal file
1
ElectronJS/tasks/183.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"id":183,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/16/2023, 1:55:02 AM","update_time":"7/16/2023, 2:02:09 AM","version":"0.3.6","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"\n \n \n \n \n \n \n \n \n \n \n \n \n \n ;0\n 我的购物车\n \n \n \n 平板電腦爆款耳機手機數據線年貨節\n \n 领券中心今日推荐\n "}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"//*[@id=\"service-2017\"]/div[1]/ol/li[1]","waitElementTime":10,"clear":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[4]/div[1]","allXPaths":["/html/body/div[4]/div[1]","//div[contains(., '')]","//DIV[@class='w']","/html/body/div[last()-6]/div"],"exampleValues":[{"num":0,"value":"\n \n \n \n \n \n \n \n \n \n \n \n \n \n ;0\n 我的购物车\n \n \n \n 平板電腦爆款耳機手機數據線年貨節\n \n 领券中心今日推荐\n "}],"unique_index":"7c04qey9fkllk4b56jd","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":-1,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"//div[123]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[]}}]}
|
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
|||||||
"justMyCode": false,
|
"justMyCode": false,
|
||||||
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
|
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||||
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||||
"args": ["--id", "[84]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
|
"args": ["--id", "[54]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
@ -1,5 +1,28 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# import atexit
|
# import atexit
|
||||||
|
from utils import download_image, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, replace_field_values, write_to_csv, write_to_excel
|
||||||
|
from myChrome import MyChrome
|
||||||
|
from threading import Thread, Event
|
||||||
|
from PIL import Image
|
||||||
|
from commandline_config import Config
|
||||||
|
import os
|
||||||
|
import csv
|
||||||
|
from openpyxl import load_workbook, Workbook
|
||||||
|
import random
|
||||||
|
from selenium.webdriver import ActionChains
|
||||||
|
from selenium.webdriver.support.ui import Select
|
||||||
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||||
|
from selenium.common.exceptions import StaleElementReferenceException, InvalidSelectorException
|
||||||
|
from selenium.common.exceptions import TimeoutException
|
||||||
|
from selenium.common.exceptions import NoSuchElementException
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
|
from selenium.webdriver.common.keys import Keys
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from pynput.keyboard import Key, Listener
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import io # 遇到错误退出时应执行的代码
|
import io # 遇到错误退出时应执行的代码
|
||||||
import json
|
import json
|
||||||
@ -17,41 +40,18 @@ from ddddocr import DdddOcr
|
|||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志
|
onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志
|
||||||
# import undetected_chromedriver as uc
|
# import undetected_chromedriver as uc
|
||||||
from pynput.keyboard import Key, Listener
|
|
||||||
from selenium.webdriver.chrome.options import Options
|
|
||||||
from selenium.webdriver.common.keys import Keys
|
|
||||||
from selenium.webdriver.common.action_chains import ActionChains
|
|
||||||
from selenium import webdriver
|
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
from selenium.common.exceptions import NoSuchElementException
|
|
||||||
from selenium.common.exceptions import TimeoutException
|
|
||||||
from selenium.common.exceptions import StaleElementReferenceException, InvalidSelectorException
|
|
||||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
||||||
from selenium.webdriver.support.ui import Select
|
|
||||||
from selenium.webdriver import ActionChains
|
|
||||||
from selenium.webdriver.common.by import By
|
|
||||||
import random
|
|
||||||
# import pandas as pd
|
# import pandas as pd
|
||||||
from openpyxl import load_workbook, Workbook
|
|
||||||
# import numpy
|
# import numpy
|
||||||
import csv
|
|
||||||
import os
|
|
||||||
from commandline_config import Config
|
|
||||||
# import pytesseract
|
# import pytesseract
|
||||||
from PIL import Image
|
|
||||||
# import uuid
|
# import uuid
|
||||||
from threading import Thread, Event
|
|
||||||
from myChrome import MyChrome
|
|
||||||
if sys.platform != "darwin":
|
if sys.platform != "darwin":
|
||||||
from myChrome import MyUCChrome
|
from myChrome import MyUCChrome
|
||||||
from utils import download_image, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, replace_field_values, write_to_csv, write_to_excel
|
|
||||||
desired_capabilities = DesiredCapabilities.CHROME
|
desired_capabilities = DesiredCapabilities.CHROME
|
||||||
desired_capabilities["pageLoadStrategy"] = "none"
|
desired_capabilities["pageLoadStrategy"] = "none"
|
||||||
|
|
||||||
|
|
||||||
class BrowserThread(Thread):
|
class BrowserThread(Thread):
|
||||||
def __init__(self, browser_t, id, service, version, event, saveName, config):
|
def __init__(self, browser_t, id, service, version, event, saveName, config):
|
||||||
Thread.__init__(self)
|
Thread.__init__(self)
|
||||||
@ -73,7 +73,7 @@ class BrowserThread(Thread):
|
|||||||
self.BREAK = False
|
self.BREAK = False
|
||||||
self.CONTINUE = False
|
self.CONTINUE = False
|
||||||
# 名称设定
|
# 名称设定
|
||||||
if saveName != "": # 命令行覆盖保存名称
|
if saveName != "": # 命令行覆盖保存名称
|
||||||
self.saveName = saveName # 保存文件的名字
|
self.saveName = saveName # 保存文件的名字
|
||||||
now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
||||||
self.saveName = self.saveName.replace("current_time", now)
|
self.saveName = self.saveName.replace("current_time", now)
|
||||||
@ -83,22 +83,27 @@ class BrowserThread(Thread):
|
|||||||
if not os.path.exists("Data/Task_" + str(i)):
|
if not os.path.exists("Data/Task_" + str(i)):
|
||||||
os.mkdir("Data/Task_" + str(i))
|
os.mkdir("Data/Task_" + str(i))
|
||||||
if not os.path.exists("Data/Task_" + str(i) + "/" + self.saveName):
|
if not os.path.exists("Data/Task_" + str(i) + "/" + self.saveName):
|
||||||
os.mkdir("Data/Task_" + str(i) + "/" + self.saveName) # 创建保存文件夹用来保存截图
|
os.mkdir("Data/Task_" + str(i) + "/" +
|
||||||
|
self.saveName) # 创建保存文件夹用来保存截图
|
||||||
self.getDataStep = 0
|
self.getDataStep = 0
|
||||||
self.startSteps = 0
|
self.startSteps = 0
|
||||||
try:
|
try:
|
||||||
startFromExit = service["startFromExit"] # 从上次退出的步骤开始
|
startFromExit = service["startFromExit"] # 从上次退出的步骤开始
|
||||||
if startFromExit == 1:
|
if startFromExit == 1:
|
||||||
with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_steps.txt', 'r', encoding='utf-8-sig') as file_obj:
|
with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_steps.txt', 'r', encoding='utf-8-sig') as file_obj:
|
||||||
self.startSteps = int(file_obj.read()) # 读取已执行步数
|
self.startSteps = int(file_obj.read()) # 读取已执行步数
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
if self.startSteps != 0:
|
if self.startSteps != 0:
|
||||||
print("此模式下,任务ID", self.id, "将从上次退出的步骤开始执行,之前已采集条数为", self.startSteps, "条。")
|
print("此模式下,任务ID", self.id, "将从上次退出的步骤开始执行,之前已采集条数为",
|
||||||
print("In this mode, task ID", self.id, "will start from the last step, before we already collected", self.startSteps, " items.")
|
self.startSteps, "条。")
|
||||||
|
print("In this mode, task ID", self.id,
|
||||||
|
"will start from the last step, before we already collected", self.startSteps, " items.")
|
||||||
else:
|
else:
|
||||||
print("此模式下,任务ID", self.id, "将从头开始执行,如果需要从上次退出的步骤开始执行,请在保存任务时设置是否从上次保存位置开始执行为“是”。")
|
print("此模式下,任务ID", self.id,
|
||||||
print("In this mode, task ID", self.id, "will start from the beginning, if you want to start from the last step, please set the option 'start from the last step' to 'yes' when saving the task.")
|
"将从头开始执行,如果需要从上次退出的步骤开始执行,请在保存任务时设置是否从上次保存位置开始执行为“是”。")
|
||||||
|
print("In this mode, task ID", self.id,
|
||||||
|
"will start from the beginning, if you want to start from the last step, please set the option 'start from the last step' to 'yes' when saving the task.")
|
||||||
stealth_path = driver_path[:driver_path.find(
|
stealth_path = driver_path[:driver_path.find(
|
||||||
"chromedriver")] + "stealth.min.js"
|
"chromedriver")] + "stealth.min.js"
|
||||||
with open(stealth_path, 'r') as f:
|
with open(stealth_path, 'r') as f:
|
||||||
@ -167,12 +172,12 @@ class BrowserThread(Thread):
|
|||||||
self.outputParameters = {}
|
self.outputParameters = {}
|
||||||
self.service = service
|
self.service = service
|
||||||
self.outputParametersTypes = []
|
self.outputParametersTypes = []
|
||||||
self.outputParametersRecord = [] # 字段是否被记录
|
self.outputParametersRecord = [] # 字段是否被记录
|
||||||
self.dataNotFoundKeys = {} # 记录没有找到数据的key
|
self.dataNotFoundKeys = {} # 记录没有找到数据的key
|
||||||
self.log = "" # 记下现在总共开了多少个标签页
|
self.log = "" # 记下现在总共开了多少个标签页
|
||||||
self.history = {"index": 0, "handle": None} # 记录页面现在所以在的历史记录的位置
|
self.history = {"index": 0, "handle": None} # 记录页面现在所以在的历史记录的位置
|
||||||
self.SAVED = False # 记录是否已经存储了
|
self.SAVED = False # 记录是否已经存储了
|
||||||
for para in service["outputParameters"]: # 初始化输出参数
|
for para in service["outputParameters"]: # 初始化输出参数
|
||||||
if para["name"] not in self.outputParameters.keys():
|
if para["name"] not in self.outputParameters.keys():
|
||||||
self.outputParameters[para["name"]] = ""
|
self.outputParameters[para["name"]] = ""
|
||||||
self.dataNotFoundKeys[para["name"]] = False
|
self.dataNotFoundKeys[para["name"]] = False
|
||||||
@ -181,7 +186,8 @@ class BrowserThread(Thread):
|
|||||||
except:
|
except:
|
||||||
self.outputParametersTypes.append("text")
|
self.outputParametersTypes.append("text")
|
||||||
try:
|
try:
|
||||||
self.outputParametersRecord.append(bool(para["recordASField"]))
|
self.outputParametersRecord.append(
|
||||||
|
bool(para["recordASField"]))
|
||||||
except:
|
except:
|
||||||
self.outputParametersRecord.append(True)
|
self.outputParametersRecord.append(True)
|
||||||
# 文件叠加的时候不添加表头
|
# 文件叠加的时候不添加表头
|
||||||
@ -203,11 +209,19 @@ class BrowserThread(Thread):
|
|||||||
iframe = node["parameters"]["iframe"]
|
iframe = node["parameters"]["iframe"]
|
||||||
except:
|
except:
|
||||||
node["parameters"]["iframe"] = False
|
node["parameters"]["iframe"] = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
node["parameters"]["xpath"] = lowercase_tags_in_xpath(
|
node["parameters"]["xpath"] = lowercase_tags_in_xpath(
|
||||||
node["parameters"]["xpath"])
|
node["parameters"]["xpath"])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
try:
|
||||||
|
node["parameters"]["waitElementIframeIndex"] = int(
|
||||||
|
node["parameters"]["waitElementIframeIndex"])
|
||||||
|
except:
|
||||||
|
node["parameters"]["waitElement"] = ""
|
||||||
|
node["parameters"]["waitElementTime"] = 10
|
||||||
|
node["parameters"]["waitElementIframeIndex"] = 0
|
||||||
if node["option"] == 1: # 打开网页操作
|
if node["option"] == 1: # 打开网页操作
|
||||||
try:
|
try:
|
||||||
cookies = node["parameters"]["cookies"]
|
cookies = node["parameters"]["cookies"]
|
||||||
@ -216,8 +230,10 @@ class BrowserThread(Thread):
|
|||||||
if node["option"] == 2: # 点击操作
|
if node["option"] == 2: # 点击操作
|
||||||
if node["parameters"]["useLoop"]:
|
if node["parameters"]["useLoop"]:
|
||||||
if self.task_version <= "0.3.5":
|
if self.task_version <= "0.3.5":
|
||||||
node["parameters"]["xpath"] = "" # 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath
|
# 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath
|
||||||
print("您的任务版本号为" + self.task_version + ",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath")
|
node["parameters"]["xpath"] = ""
|
||||||
|
print("您的任务版本号为" + self.task_version +
|
||||||
|
",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath")
|
||||||
elif node["option"] == 3: # 提取数据操作
|
elif node["option"] == 3: # 提取数据操作
|
||||||
node["parameters"]["recordASField"] = 0
|
node["parameters"]["recordASField"] = 0
|
||||||
paras = node["parameters"]["paras"]
|
paras = node["parameters"]["paras"]
|
||||||
@ -231,7 +247,8 @@ class BrowserThread(Thread):
|
|||||||
except:
|
except:
|
||||||
para["iframe"] = False
|
para["iframe"] = False
|
||||||
try:
|
try:
|
||||||
para["relativeXPath"] = lowercase_tags_in_xpath(para["relativeXPath"])
|
para["relativeXPath"] = lowercase_tags_in_xpath(
|
||||||
|
para["relativeXPath"])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
try:
|
try:
|
||||||
@ -247,7 +264,7 @@ class BrowserThread(Thread):
|
|||||||
para["optimizable"] = False
|
para["optimizable"] = False
|
||||||
elif node["option"] == 4: # 输入文字
|
elif node["option"] == 4: # 输入文字
|
||||||
try:
|
try:
|
||||||
index = node["parameters"]["index"] # 索引值
|
index = node["parameters"]["index"] # 索引值
|
||||||
except:
|
except:
|
||||||
node["parameters"]["index"] = 0
|
node["parameters"]["index"] = 0
|
||||||
elif node["option"] == 5: # 自定义操作
|
elif node["option"] == 5: # 自定义操作
|
||||||
@ -255,23 +272,27 @@ class BrowserThread(Thread):
|
|||||||
clear = node["parameters"]["clear"]
|
clear = node["parameters"]["clear"]
|
||||||
except:
|
except:
|
||||||
node["parameters"]["clear"] = 0
|
node["parameters"]["clear"] = 0
|
||||||
elif node["option"] == 7: # 移动到元素
|
elif node["option"] == 7: # 移动到元素
|
||||||
if node["parameters"]["useLoop"]:
|
if node["parameters"]["useLoop"]:
|
||||||
if self.task_version <= "0.3.5":
|
if self.task_version <= "0.3.5":
|
||||||
node["parameters"]["xpath"] = "" # 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath
|
# 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath
|
||||||
print("您的任务版本号为" + self.task_version + ",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath")
|
node["parameters"]["xpath"] = ""
|
||||||
|
print("您的任务版本号为" + self.task_version +
|
||||||
|
",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath")
|
||||||
|
|
||||||
def readFromExcel(self):
|
def readFromExcel(self):
|
||||||
if self.inputExcel == "":
|
if self.inputExcel == "":
|
||||||
return 0
|
return 0
|
||||||
try:
|
try:
|
||||||
workbook = load_workbook(self.inputExcel)
|
workbook = load_workbook(self.inputExcel)
|
||||||
except:
|
except:
|
||||||
print("读取Excel失败,将会使用默认参数执行任务,请检查文件路径是否正确:", os.path.abspath(self.inputExcel))
|
print("读取Excel失败,将会使用默认参数执行任务,请检查文件路径是否正确:",
|
||||||
print("Failed to read Excel, will execute the task with default parameters, please check if the file path is correct: ", os.path.abspath(self.inputExcel))
|
os.path.abspath(self.inputExcel))
|
||||||
|
print("Failed to read Excel, will execute the task with default parameters, please check if the file path is correct: ",
|
||||||
|
os.path.abspath(self.inputExcel))
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
sheet_name_list = workbook.sheetnames
|
sheet_name_list = workbook.sheetnames
|
||||||
sheet = workbook[sheet_name_list[0]]
|
sheet = workbook[sheet_name_list[0]]
|
||||||
data = []
|
data = []
|
||||||
@ -285,7 +306,7 @@ class BrowserThread(Thread):
|
|||||||
key = row[0]
|
key = row[0]
|
||||||
values = [str(val) for val in row[1:] if val is not None]
|
values = [str(val) for val in row[1:] if val is not None]
|
||||||
result_dict.setdefault(key, []).extend([values])
|
result_dict.setdefault(key, []).extend([values])
|
||||||
|
|
||||||
data = {}
|
data = {}
|
||||||
for key, arr in result_dict.items():
|
for key, arr in result_dict.items():
|
||||||
result = []
|
result = []
|
||||||
@ -365,15 +386,19 @@ class BrowserThread(Thread):
|
|||||||
# 写入数据
|
# 写入数据
|
||||||
if self.outputFormat == "csv" or self.outputFormat == "txt":
|
if self.outputFormat == "csv" or self.outputFormat == "txt":
|
||||||
file_name = "Data/Task_" + \
|
file_name = "Data/Task_" + \
|
||||||
str(self.id) + "/" + self.saveName + '.' + self.outputFormat
|
str(self.id) + "/" + self.saveName + \
|
||||||
write_to_csv(file_name, self.OUTPUT, self.outputParametersRecord)
|
'.' + self.outputFormat
|
||||||
|
write_to_csv(file_name, self.OUTPUT,
|
||||||
|
self.outputParametersRecord)
|
||||||
elif self.outputFormat == "xlsx":
|
elif self.outputFormat == "xlsx":
|
||||||
file_name = "Data/Task_" + \
|
file_name = "Data/Task_" + \
|
||||||
str(self.id) + "/" + self.saveName + '.xlsx'
|
str(self.id) + "/" + self.saveName + '.xlsx'
|
||||||
write_to_excel(file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord)
|
write_to_excel(
|
||||||
|
file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord)
|
||||||
elif self.outputFormat == "mysql":
|
elif self.outputFormat == "mysql":
|
||||||
self.mysql.write_to_mysql(self.OUTPUT, self.outputParametersRecord, self.outputParametersTypes)
|
self.mysql.write_to_mysql(
|
||||||
|
self.OUTPUT, self.outputParametersRecord, self.outputParametersTypes)
|
||||||
|
|
||||||
self.OUTPUT = []
|
self.OUTPUT = []
|
||||||
self.log = ""
|
self.log = ""
|
||||||
|
|
||||||
@ -403,10 +428,12 @@ class BrowserThread(Thread):
|
|||||||
i = 0
|
i = 0
|
||||||
while True:
|
while True:
|
||||||
# newBodyText = self.browser.page_source
|
# newBodyText = self.browser.page_source
|
||||||
newBodyText = self.browser.find_element(By.CSS_SELECTOR, "body", iframe=para["iframe"]).text
|
newBodyText = self.browser.find_element(
|
||||||
|
By.CSS_SELECTOR, "body", iframe=para["iframe"]).text
|
||||||
if newBodyText == bodyText:
|
if newBodyText == bodyText:
|
||||||
print("页面已检测不到新内容,停止滚动。")
|
print("页面已检测不到新内容,停止滚动。")
|
||||||
print("No new content detected on the page, stop scrolling.")
|
print(
|
||||||
|
"No new content detected on the page, stop scrolling.")
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
bodyText = newBodyText
|
bodyText = newBodyText
|
||||||
@ -493,13 +520,15 @@ class BrowserThread(Thread):
|
|||||||
output = exec(code)
|
output = exec(code)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("执行下面的代码时出错:" + code, ",错误为:", e)
|
print("执行下面的代码时出错:" + code, ",错误为:", e)
|
||||||
print("Error executing the following code:" + code, ", error is:", e)
|
print("Error executing the following code:" +
|
||||||
|
code, ", error is:", e)
|
||||||
elif int(codeMode) == 6:
|
elif int(codeMode) == 6:
|
||||||
try:
|
try:
|
||||||
output = eval(code)
|
output = eval(code)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("获得下面的代码返回值时出错:" + code, ",错误为:", e)
|
print("获得下面的代码返回值时出错:" + code, ",错误为:", e)
|
||||||
print("Error executing and getting return value the following code:" + code, ", error is:", e)
|
print(
|
||||||
|
"Error executing and getting return value the following code:" + code, ", error is:", e)
|
||||||
elif int(codeMode) == 1:
|
elif int(codeMode) == 1:
|
||||||
self.recordLog("Execute System Call:" + code)
|
self.recordLog("Execute System Call:" + code)
|
||||||
self.recordLog("执行系统命令:" + code)
|
self.recordLog("执行系统命令:" + code)
|
||||||
@ -531,7 +560,8 @@ class BrowserThread(Thread):
|
|||||||
max_wait_time = int(paras["waitTime"])
|
max_wait_time = int(paras["waitTime"])
|
||||||
if codeMode == 2: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
if codeMode == 2: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
||||||
try:
|
try:
|
||||||
loopPath = replace_field_values(loopPath, self.outputParameters)
|
loopPath = replace_field_values(
|
||||||
|
loopPath, self.outputParameters)
|
||||||
elements = self.browser.find_elements(
|
elements = self.browser.find_elements(
|
||||||
By.XPATH, loopPath, iframe=paras["iframe"])
|
By.XPATH, loopPath, iframe=paras["iframe"])
|
||||||
element = elements[index]
|
element = elements[index]
|
||||||
@ -544,7 +574,7 @@ class BrowserThread(Thread):
|
|||||||
self.BREAK = True
|
self.BREAK = True
|
||||||
elif codeMode == 4:
|
elif codeMode == 4:
|
||||||
self.CONTINUE = True
|
self.CONTINUE = True
|
||||||
else: # 0 1 5 6
|
else: # 0 1 5 6
|
||||||
output = self.execute_code(
|
output = self.execute_code(
|
||||||
codeMode, code, max_wait_time, iframe=paras["iframe"])
|
codeMode, code, max_wait_time, iframe=paras["iframe"])
|
||||||
recordASField = bool(paras["recordASField"])
|
recordASField = bool(paras["recordASField"])
|
||||||
@ -553,7 +583,8 @@ class BrowserThread(Thread):
|
|||||||
# print("The return value of operation <" + node["title"] + "> is: " + output)
|
# print("The return value of operation <" + node["title"] + "> is: " + output)
|
||||||
self.outputParameters[node["title"]] = output
|
self.outputParameters[node["title"]] = output
|
||||||
if recordASField:
|
if recordASField:
|
||||||
line = new_line(self.outputParameters, self.maxViewLength, self.outputParametersRecord)
|
line = new_line(self.outputParameters,
|
||||||
|
self.maxViewLength, self.outputParametersRecord)
|
||||||
self.OUTPUT.append(line)
|
self.OUTPUT.append(line)
|
||||||
|
|
||||||
def switchSelect(self, para, loopValue):
|
def switchSelect(self, para, loopValue):
|
||||||
@ -566,7 +597,8 @@ class BrowserThread(Thread):
|
|||||||
optionValue = loopValue.split("~")[index - 1]
|
optionValue = loopValue.split("~")[index - 1]
|
||||||
except:
|
except:
|
||||||
print("取值失败,可能是因为取值索引超出范围,将使用整个文本值")
|
print("取值失败,可能是因为取值索引超出范围,将使用整个文本值")
|
||||||
print("Failed to get value, maybe because the index is out of range, will use the entire text value")
|
print(
|
||||||
|
"Failed to get value, maybe because the index is out of range, will use the entire text value")
|
||||||
else:
|
else:
|
||||||
optionValue = loopValue
|
optionValue = loopValue
|
||||||
optionMode = 1
|
optionMode = 1
|
||||||
@ -605,11 +637,11 @@ class BrowserThread(Thread):
|
|||||||
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
||||||
if xpath == "":
|
if xpath == "":
|
||||||
path = loopPath
|
path = loopPath
|
||||||
else:
|
else:
|
||||||
path = "(" + loopPath + ")" + \
|
path = "(" + loopPath + ")" + \
|
||||||
"[" + str(index + 1) + "]" + \
|
"[" + str(index + 1) + "]" + \
|
||||||
xpath
|
xpath
|
||||||
index = 0 # 如果是相对循环内元素的点击,在定位到元素后,index应该重置为0
|
index = 0 # 如果是相对循环内元素的点击,在定位到元素后,index应该重置为0
|
||||||
# element = loopElement
|
# element = loopElement
|
||||||
else:
|
else:
|
||||||
index = 0
|
index = 0
|
||||||
@ -632,10 +664,35 @@ class BrowserThread(Thread):
|
|||||||
|
|
||||||
def executeNode(self, nodeId, loopValue="", loopPath="", index=0):
|
def executeNode(self, nodeId, loopValue="", loopPath="", index=0):
|
||||||
node = self.procedure[nodeId]
|
node = self.procedure[nodeId]
|
||||||
WebDriverWait(self.browser, 10).until
|
# WebDriverWait(self.browser, 10).until
|
||||||
# 等待元素出现才进行操作,10秒内未出现则报错
|
# # 等待元素出现才进行操作,10秒内未出现则报错
|
||||||
(EC.visibility_of_element_located(
|
# (EC.visibility_of_element_located(
|
||||||
(By.XPATH, node["parameters"]["xpath"])))
|
# (By.XPATH, node["parameters"]["xpath"])))
|
||||||
|
try:
|
||||||
|
if node["parameters"]["waitElement"] != "":
|
||||||
|
waitElement = replace_field_values(
|
||||||
|
node["parameters"]["waitElement"], self.outputParameters)
|
||||||
|
waitElementTime = float(node["parameters"]["waitElementTime"])
|
||||||
|
waitElementIframeIndex = node["parameters"]["waitElementInIframe"]
|
||||||
|
print("等待元素出现:", waitElement)
|
||||||
|
print("Waiting for element to appear:", waitElement)
|
||||||
|
if waitElementIframeIndex > 0:
|
||||||
|
iframes = self.browser.find_elements(
|
||||||
|
By.CSS_SELECTOR, "iframe", iframe=False)
|
||||||
|
iframe = iframes[waitElementIframeIndex - 1]
|
||||||
|
self.browser.switch_to.frame(iframe)
|
||||||
|
WebDriverWait(self.browser, waitElementTime).until(
|
||||||
|
EC.presence_of_element_located((By.XPATH, waitElement))
|
||||||
|
)
|
||||||
|
if waitElementIframeIndex > 0:
|
||||||
|
self.browser.switch_to.default_content()
|
||||||
|
except Exception as e:
|
||||||
|
if waitElement != "":
|
||||||
|
print("等待元素出现超时:", waitElement, ",将继续执行。")
|
||||||
|
print("Timeout waiting for element to appear:",
|
||||||
|
waitElement, ", will continue to execute.")
|
||||||
|
print(e)
|
||||||
|
self.recordLog("Wait element not found")
|
||||||
|
|
||||||
# 根据不同选项执行不同操作
|
# 根据不同选项执行不同操作
|
||||||
if node["option"] == 0 or node["option"] == 10: # root操作,条件分支操作
|
if node["option"] == 0 or node["option"] == 10: # root操作,条件分支操作
|
||||||
@ -652,7 +709,7 @@ class BrowserThread(Thread):
|
|||||||
if self.totalSteps >= self.startSteps:
|
if self.totalSteps >= self.startSteps:
|
||||||
self.recordLog("getData")
|
self.recordLog("getData")
|
||||||
self.getData(node["parameters"], loopValue, node["isInLoop"],
|
self.getData(node["parameters"], loopValue, node["isInLoop"],
|
||||||
parentPath=loopPath, index=index)
|
parentPath=loopPath, index=index)
|
||||||
self.saveData()
|
self.saveData()
|
||||||
else:
|
else:
|
||||||
# self.getDataStep += 1
|
# self.getDataStep += 1
|
||||||
@ -714,7 +771,8 @@ class BrowserThread(Thread):
|
|||||||
continue
|
continue
|
||||||
elif tType == 2: # 当前页面包含元素
|
elif tType == 2: # 当前页面包含元素
|
||||||
try:
|
try:
|
||||||
xpath = replace_field_values(cnode["parameters"]["value"], self.outputParameters)
|
xpath = replace_field_values(
|
||||||
|
cnode["parameters"]["value"], self.outputParameters)
|
||||||
if self.browser.find_element(By.XPATH, xpath, iframe=cnode["parameters"]["iframe"]):
|
if self.browser.find_element(By.XPATH, xpath, iframe=cnode["parameters"]["iframe"]):
|
||||||
executeBranchId = i
|
executeBranchId = i
|
||||||
break
|
break
|
||||||
@ -722,7 +780,8 @@ class BrowserThread(Thread):
|
|||||||
continue
|
continue
|
||||||
elif tType == 3: # 当前循环元素包括文本
|
elif tType == 3: # 当前循环元素包括文本
|
||||||
try:
|
try:
|
||||||
value = replace_field_values(cnode["parameters"]["value"], self.outputParameters)
|
value = replace_field_values(
|
||||||
|
cnode["parameters"]["value"], self.outputParameters)
|
||||||
if loopElement.text.find(value) >= 0:
|
if loopElement.text.find(value) >= 0:
|
||||||
executeBranchId = i
|
executeBranchId = i
|
||||||
break
|
break
|
||||||
@ -730,7 +789,8 @@ class BrowserThread(Thread):
|
|||||||
continue
|
continue
|
||||||
elif tType == 4: # 当前循环元素包括元素
|
elif tType == 4: # 当前循环元素包括元素
|
||||||
try:
|
try:
|
||||||
xpath = replace_field_values(cnode["parameters"]["value"][1:], self.outputParameters)
|
xpath = replace_field_values(
|
||||||
|
cnode["parameters"]["value"][1:], self.outputParameters)
|
||||||
if loopElement.find_element(By.XPATH, xpath):
|
if loopElement.find_element(By.XPATH, xpath):
|
||||||
executeBranchId = i
|
executeBranchId = i
|
||||||
break
|
break
|
||||||
@ -782,7 +842,8 @@ class BrowserThread(Thread):
|
|||||||
finished = False
|
finished = False
|
||||||
# newBodyText = self.browser.page_source
|
# newBodyText = self.browser.page_source
|
||||||
# newBodyText = self.browser.find_element(By.XPATH, "//body").text
|
# newBodyText = self.browser.find_element(By.XPATH, "//body").text
|
||||||
newBodyText = self.browser.find_element(By.CSS_SELECTOR, "body", iframe=node["parameters"]["iframe"]).text
|
newBodyText = self.browser.find_element(
|
||||||
|
By.CSS_SELECTOR, "body", iframe=node["parameters"]["iframe"]).text
|
||||||
if node["parameters"]["exitCount"] == 0:
|
if node["parameters"]["exitCount"] == 0:
|
||||||
if newBodyText == bodyText: # 如果页面内容无变化
|
if newBodyText == bodyText: # 如果页面内容无变化
|
||||||
print("页面已检测不到新内容,停止循环。")
|
print("页面已检测不到新内容,停止循环。")
|
||||||
@ -790,9 +851,8 @@ class BrowserThread(Thread):
|
|||||||
finished = True
|
finished = True
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
if node["parameters"]["exitCount"] == 0:
|
print("检测到页面变化,继续循环。")
|
||||||
print("检测到页面变化,继续循环。")
|
print("Page changed detected, continue loop.")
|
||||||
print("Page changed detected, continue loop.")
|
|
||||||
bodyText = newBodyText
|
bodyText = newBodyText
|
||||||
xpath = replace_field_values(
|
xpath = replace_field_values(
|
||||||
node["parameters"]["xpath"], self.outputParameters)
|
node["parameters"]["xpath"], self.outputParameters)
|
||||||
@ -801,10 +861,10 @@ class BrowserThread(Thread):
|
|||||||
for i in node["sequence"]: # 挨个执行操作
|
for i in node["sequence"]: # 挨个执行操作
|
||||||
self.executeNode(
|
self.executeNode(
|
||||||
i, element, xpath, 0)
|
i, element, xpath, 0)
|
||||||
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
||||||
self.CONTINUE = False
|
self.CONTINUE = False
|
||||||
break
|
break
|
||||||
if self.BREAK: # 如果有break操作,退出循环
|
if self.BREAK: # 如果有break操作,退出循环
|
||||||
self.BREAK = False
|
self.BREAK = False
|
||||||
finished = True
|
finished = True
|
||||||
break
|
break
|
||||||
@ -864,7 +924,7 @@ class BrowserThread(Thread):
|
|||||||
for i in node["sequence"]: # 挨个顺序执行循环里所有的操作
|
for i in node["sequence"]: # 挨个顺序执行循环里所有的操作
|
||||||
self.executeNode(i, elements[index],
|
self.executeNode(i, elements[index],
|
||||||
xpath, index)
|
xpath, index)
|
||||||
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
||||||
self.CONTINUE = False
|
self.CONTINUE = False
|
||||||
break
|
break
|
||||||
if self.BREAK:
|
if self.BREAK:
|
||||||
@ -872,9 +932,9 @@ class BrowserThread(Thread):
|
|||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
changed_handle = self.browser.current_window_handle != thisHandle
|
changed_handle = self.browser.current_window_handle != thisHandle
|
||||||
except: # 如果网页被意外关闭了的情况下
|
except: # 如果网页被意外关闭了的情况下
|
||||||
self.browser.switch_to.window(
|
self.browser.switch_to.window(
|
||||||
self.browser.window_handles[-1])
|
self.browser.window_handles[-1])
|
||||||
changed_handle = self.browser.window_handles[-1] != thisHandle
|
changed_handle = self.browser.window_handles[-1] != thisHandle
|
||||||
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
|
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
|
||||||
try:
|
try:
|
||||||
@ -925,7 +985,7 @@ class BrowserThread(Thread):
|
|||||||
By.XPATH, path, iframe=node["parameters"]["iframe"])
|
By.XPATH, path, iframe=node["parameters"]["iframe"])
|
||||||
for i in node["sequence"]: # 挨个执行操作
|
for i in node["sequence"]: # 挨个执行操作
|
||||||
self.executeNode(i, element, path, 0)
|
self.executeNode(i, element, path, 0)
|
||||||
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
||||||
self.CONTINUE = False
|
self.CONTINUE = False
|
||||||
break
|
break
|
||||||
if self.BREAK:
|
if self.BREAK:
|
||||||
@ -933,9 +993,9 @@ class BrowserThread(Thread):
|
|||||||
break
|
break
|
||||||
try:
|
try:
|
||||||
changed_handle = self.browser.current_window_handle != thisHandle
|
changed_handle = self.browser.current_window_handle != thisHandle
|
||||||
except: # 如果网页被意外关闭了的情况下
|
except: # 如果网页被意外关闭了的情况下
|
||||||
self.browser.switch_to.window(
|
self.browser.switch_to.window(
|
||||||
self.browser.window_handles[-1])
|
self.browser.window_handles[-1])
|
||||||
changed_handle = self.browser.window_handles[-1] != thisHandle
|
changed_handle = self.browser.window_handles[-1] != thisHandle
|
||||||
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
|
if changed_handle: # 如果执行完一次循环之后标签页的位置发生了变化
|
||||||
try:
|
try:
|
||||||
@ -984,7 +1044,7 @@ class BrowserThread(Thread):
|
|||||||
self.recordLog("input: " + text)
|
self.recordLog("input: " + text)
|
||||||
for i in node["sequence"]: # 挨个执行操作
|
for i in node["sequence"]: # 挨个执行操作
|
||||||
self.executeNode(i, text, "", 0)
|
self.executeNode(i, text, "", 0)
|
||||||
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
||||||
self.CONTINUE = False
|
self.CONTINUE = False
|
||||||
break
|
break
|
||||||
if self.BREAK:
|
if self.BREAK:
|
||||||
@ -1009,7 +1069,7 @@ class BrowserThread(Thread):
|
|||||||
self.recordLog("input: " + url)
|
self.recordLog("input: " + url)
|
||||||
for i in node["sequence"]:
|
for i in node["sequence"]:
|
||||||
self.executeNode(i, url, "", 0)
|
self.executeNode(i, url, "", 0)
|
||||||
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
||||||
self.CONTINUE = False
|
self.CONTINUE = False
|
||||||
break
|
break
|
||||||
if self.BREAK:
|
if self.BREAK:
|
||||||
@ -1037,7 +1097,7 @@ class BrowserThread(Thread):
|
|||||||
break
|
break
|
||||||
for i in node["sequence"]: # 挨个执行操作
|
for i in node["sequence"]: # 挨个执行操作
|
||||||
self.executeNode(i, code, node["parameters"]["xpath"], 0)
|
self.executeNode(i, code, node["parameters"]["xpath"], 0)
|
||||||
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
if self.BREAK or self.CONTINUE: # 如果有break操作,下面的操作不执行
|
||||||
self.CONTINUE = False
|
self.CONTINUE = False
|
||||||
break
|
break
|
||||||
if self.BREAK:
|
if self.BREAK:
|
||||||
@ -1067,7 +1127,7 @@ class BrowserThread(Thread):
|
|||||||
# clear output parameters
|
# clear output parameters
|
||||||
for key in self.outputParameters:
|
for key in self.outputParameters:
|
||||||
self.outputParameters[key] = ""
|
self.outputParameters[key] = ""
|
||||||
else: # 在流程图其他位置设置了打开网页的操作,读取的应该是第一个网址,如打开网页后登录,再打开第二个网页
|
else: # 在流程图其他位置设置了打开网页的操作,读取的应该是第一个网址,如打开网页后登录,再打开第二个网页
|
||||||
url = list(filter(isnotnull, para["links"].split("\n")))[0]
|
url = list(filter(isnotnull, para["links"].split("\n")))[0]
|
||||||
# 将value中的Field[""]替换为outputParameters中的键值
|
# 将value中的Field[""]替换为outputParameters中的键值
|
||||||
url = replace_field_values(url, self.outputParameters)
|
url = replace_field_values(url, self.outputParameters)
|
||||||
@ -1150,7 +1210,8 @@ class BrowserThread(Thread):
|
|||||||
replaced_text = replaced_text.split("~")[index - 1]
|
replaced_text = replaced_text.split("~")[index - 1]
|
||||||
except:
|
except:
|
||||||
print("取值失败,可能是因为取值索引超出范围,将使用整个文本值")
|
print("取值失败,可能是因为取值索引超出范围,将使用整个文本值")
|
||||||
print("Failed to get value, maybe because the index is out of range, will use the entire text value")
|
print(
|
||||||
|
"Failed to get value, maybe because the index is out of range, will use the entire text value")
|
||||||
textbox.send_keys(replaced_text)
|
textbox.send_keys(replaced_text)
|
||||||
if value.lower().find("<enter>") >= 0:
|
if value.lower().find("<enter>") >= 0:
|
||||||
textbox.send_keys(Keys.ENTER)
|
textbox.send_keys(Keys.ENTER)
|
||||||
@ -1180,11 +1241,11 @@ class BrowserThread(Thread):
|
|||||||
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
|
||||||
if xpath == "":
|
if xpath == "":
|
||||||
path = clickPath
|
path = clickPath
|
||||||
else:
|
else:
|
||||||
path = "(" + clickPath + ")" + \
|
path = "(" + clickPath + ")" + \
|
||||||
"[" + str(index + 1) + "]" + \
|
"[" + str(index + 1) + "]" + \
|
||||||
xpath
|
xpath
|
||||||
index = 0 # 如果是相对循环内元素的点击,在定位到元素后,index应该重置为0
|
index = 0 # 如果是相对循环内元素的点击,在定位到元素后,index应该重置为0
|
||||||
# element = loopElement
|
# element = loopElement
|
||||||
else:
|
else:
|
||||||
index = 0
|
index = 0
|
||||||
@ -1226,7 +1287,8 @@ class BrowserThread(Thread):
|
|||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("点击元素失败:" + path, ",请尝试将点击类型改为JavaScript点击后重试。")
|
print("点击元素失败:" + path, ",请尝试将点击类型改为JavaScript点击后重试。")
|
||||||
print("Failed to click element:" + path, ", please try to change the click type to JavaScript Click.")
|
print("Failed to click element:" + path,
|
||||||
|
", please try to change the click type to JavaScript Click.")
|
||||||
print(e)
|
print(e)
|
||||||
self.Log(e)
|
self.Log(e)
|
||||||
self.recordLog(str(e))
|
self.recordLog(str(e))
|
||||||
@ -1374,14 +1436,15 @@ class BrowserThread(Thread):
|
|||||||
# 使用Pillow库打开截图,并转换为灰度图像
|
# 使用Pillow库打开截图,并转换为灰度图像
|
||||||
image = Image.open(screenshot_stream).convert('L')
|
image = Image.open(screenshot_stream).convert('L')
|
||||||
temp_name = "OCR_" + str(time.time()) + ".png"
|
temp_name = "OCR_" + str(time.time()) + ".png"
|
||||||
location = "Data/Task_" + str(self.id) + "/" + self.saveName + "/" + temp_name
|
location = "Data/Task_" + \
|
||||||
|
str(self.id) + "/" + self.saveName + "/" + temp_name
|
||||||
image.save(location)
|
image.save(location)
|
||||||
ocr = DdddOcr()
|
ocr = DdddOcr()
|
||||||
with open(location, 'rb') as f:
|
with open(location, 'rb') as f:
|
||||||
image_bytes = f.read()
|
image_bytes = f.read()
|
||||||
content = ocr.classification(image_bytes)
|
content = ocr.classification(image_bytes)
|
||||||
os.remove(location)
|
os.remove(location)
|
||||||
# 使用Tesseract OCR引擎识别图像中的文本
|
# 使用Tesseract OCR引擎识别图像中的文本
|
||||||
# content = pytesseract.image_to_string(image, lang='chi_sim+eng')
|
# content = pytesseract.image_to_string(image, lang='chi_sim+eng')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# try:
|
# try:
|
||||||
@ -1392,30 +1455,30 @@ class BrowserThread(Thread):
|
|||||||
# screenshot_stream = io.BytesIO(screenshot)
|
# screenshot_stream = io.BytesIO(screenshot)
|
||||||
# # 使用Pillow库打开截图,并转换为灰度图像
|
# # 使用Pillow库打开截图,并转换为灰度图像
|
||||||
# image = Image.open(screenshot_stream).convert('L')
|
# image = Image.open(screenshot_stream).convert('L')
|
||||||
# # 使用Tesseract OCR引擎识别图像中的文本
|
# # 使用Tesseract OCR引擎识别图像中的文本
|
||||||
# # content = pytesseract.image_to_string(image, lang='eng')
|
# # content = pytesseract.image_to_string(image, lang='eng')
|
||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
content = "OCR Error"
|
content = "OCR Error"
|
||||||
print(e)
|
print(e)
|
||||||
# if sys.platform == "win32":
|
# if sys.platform == "win32":
|
||||||
# print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501")
|
# print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501")
|
||||||
# print("\nhttps://www.bilibili.com/video/BV1GP411y7u4/")
|
# print("\nhttps://www.bilibili.com/video/BV1GP411y7u4/")
|
||||||
# elif sys.platform == "darwin":
|
# elif sys.platform == "darwin":
|
||||||
# print(
|
# print(
|
||||||
# "注意以上错误,要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://zhuanlan.zhihu.com/p/146044810")
|
# "注意以上错误,要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://zhuanlan.zhihu.com/p/146044810")
|
||||||
# elif sys.platform == "linux":
|
# elif sys.platform == "linux":
|
||||||
# print(
|
# print(
|
||||||
# "注意以上错误,要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://zhuanlan.zhihu.com/p/420259031")
|
# "注意以上错误,要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://zhuanlan.zhihu.com/p/420259031")
|
||||||
# else:
|
# else:
|
||||||
# print("注意以上错误,要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501")
|
# print("注意以上错误,要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501")
|
||||||
# print("\nhttps://www.bilibili.com/video/BV1GP411y7u4/")
|
# print("\nhttps://www.bilibili.com/video/BV1GP411y7u4/")
|
||||||
# print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH (need to restart EasySpider after you put in PATH): https://tesseract-ocr.github.io/tessdoc/Installation.html")
|
# print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH (need to restart EasySpider after you put in PATH): https://tesseract-ocr.github.io/tessdoc/Installation.html")
|
||||||
elif p["contentType"] == 9:
|
elif p["contentType"] == 9:
|
||||||
content = self.execute_code(
|
content = self.execute_code(
|
||||||
2, p["JS"], p["JSWaitTime"], element, iframe=p["iframe"])
|
2, p["JS"], p["JSWaitTime"], element, iframe=p["iframe"])
|
||||||
elif p["contentType"] == 12: # 系统命令返回值
|
elif p["contentType"] == 12: # 系统命令返回值
|
||||||
content = self.execute_code(1, p["JS"], p["JSWaitTime"])
|
content = self.execute_code(1, p["JS"], p["JSWaitTime"])
|
||||||
elif p["contentType"] == 13: # eval返回值
|
elif p["contentType"] == 13: # eval返回值
|
||||||
content = self.execute_code(6, p["JS"], p["JSWaitTime"])
|
content = self.execute_code(6, p["JS"], p["JSWaitTime"])
|
||||||
elif p["contentType"] == 10: # 下拉框选中的值
|
elif p["contentType"] == 10: # 下拉框选中的值
|
||||||
try:
|
try:
|
||||||
@ -1488,17 +1551,18 @@ class BrowserThread(Thread):
|
|||||||
# else:
|
# else:
|
||||||
# 如果字串里有//即子孙查找,则不动语句
|
# 如果字串里有//即子孙查找,则不动语句
|
||||||
if relativeXPath.find("//") >= 0:
|
if relativeXPath.find("//") >= 0:
|
||||||
if xpath.startswith("/"):
|
if xpath.startswith("/"):
|
||||||
full_path = "(" + parentPath + ")" + \
|
full_path = "(" + parentPath + ")" + \
|
||||||
"[" + str(index + 1) + "]"+ \
|
"[" + str(index + 1) + "]" + \
|
||||||
relativeXPath + content_type
|
relativeXPath + content_type
|
||||||
else: # 如果是id()这种形式,不需要包parentPath
|
else: # 如果是id()这种形式,不需要包parentPath
|
||||||
full_path = xpath
|
full_path = xpath
|
||||||
try:
|
try:
|
||||||
content = pageHTML.xpath(full_path)
|
content = pageHTML.xpath(full_path)
|
||||||
except:
|
except:
|
||||||
content = []
|
content = []
|
||||||
elif not relativeXPath.startswith("/"): # 如果是id()这种形式,不需要包/html/body
|
# 如果是id()这种形式,不需要包/html/body
|
||||||
|
elif not relativeXPath.startswith("/"):
|
||||||
try:
|
try:
|
||||||
content = loopElementHTML.xpath(xpath)
|
content = loopElementHTML.xpath(xpath)
|
||||||
except:
|
except:
|
||||||
@ -1507,7 +1571,8 @@ class BrowserThread(Thread):
|
|||||||
content = loopElementHTML.xpath(
|
content = loopElementHTML.xpath(
|
||||||
"/html/body/" + loopElementHTML[0][0].tag + xpath)
|
"/html/body/" + loopElementHTML[0][0].tag + xpath)
|
||||||
else:
|
else:
|
||||||
if xpath.find("/body") < 0 and xpath.startswith("/"): # 如果是id()或(//div)[1]这种形式,不需要包/html/body
|
# 如果是id()或(//div)[1]这种形式,不需要包/html/body
|
||||||
|
if xpath.find("/body") < 0 and xpath.startswith("/"):
|
||||||
xpath = "/html/body" + xpath
|
xpath = "/html/body" + xpath
|
||||||
content = pageHTML.xpath(xpath)
|
content = pageHTML.xpath(xpath)
|
||||||
if len(content) > 0:
|
if len(content) > 0:
|
||||||
@ -1517,7 +1582,8 @@ class BrowserThread(Thread):
|
|||||||
for result in content if result.strip())
|
for result in content if result.strip())
|
||||||
if p["nodeType"] == 2:
|
if p["nodeType"] == 2:
|
||||||
base_url = self.browser.current_url
|
base_url = self.browser.current_url
|
||||||
content = urljoin(base_url, content) # 合并链接相对路径为绝对路径
|
# 合并链接相对路径为绝对路径
|
||||||
|
content = urljoin(base_url, content)
|
||||||
else:
|
else:
|
||||||
content = p["default"]
|
content = p["default"]
|
||||||
if not self.dataNotFoundKeys[p["name"]]:
|
if not self.dataNotFoundKeys[p["name"]]:
|
||||||
@ -1544,7 +1610,7 @@ class BrowserThread(Thread):
|
|||||||
if not p["optimizable"]:
|
if not p["optimizable"]:
|
||||||
content = ""
|
content = ""
|
||||||
relativeXPath = replace_field_values(
|
relativeXPath = replace_field_values(
|
||||||
p["relativeXPath"], self.outputParameters)
|
p["relativeXPath"], self.outputParameters)
|
||||||
if not (p["contentType"] == 5 or p["contentType"] == 6): # 如果不是页面标题或URL,去找元素
|
if not (p["contentType"] == 5 or p["contentType"] == 6): # 如果不是页面标题或URL,去找元素
|
||||||
try:
|
try:
|
||||||
# relativeXPath = relativeXPath.lower()
|
# relativeXPath = relativeXPath.lower()
|
||||||
@ -1640,9 +1706,10 @@ class BrowserThread(Thread):
|
|||||||
continue # 再出现类似问题直接跳过
|
continue # 再出现类似问题直接跳过
|
||||||
self.outputParameters[p["name"]] = content
|
self.outputParameters[p["name"]] = content
|
||||||
self.execute_code(
|
self.execute_code(
|
||||||
2, p["afterJS"], p["afterJSWaitTime"], element, iframe=p["iframe"]) # 执行后置JS
|
2, p["afterJS"], p["afterJSWaitTime"], element, iframe=p["iframe"]) # 执行后置JS
|
||||||
if para["recordASField"] > 0:
|
if para["recordASField"] > 0:
|
||||||
line = new_line(self.outputParameters, self.maxViewLength, self.outputParametersRecord)
|
line = new_line(self.outputParameters,
|
||||||
|
self.maxViewLength, self.outputParametersRecord)
|
||||||
self.OUTPUT.append(line)
|
self.OUTPUT.append(line)
|
||||||
# rt.end()
|
# rt.end()
|
||||||
|
|
||||||
@ -1659,7 +1726,7 @@ if __name__ == '__main__':
|
|||||||
"read_type": "remote",
|
"read_type": "remote",
|
||||||
"headless": False,
|
"headless": False,
|
||||||
"server_address": "http://localhost:8074",
|
"server_address": "http://localhost:8074",
|
||||||
"keyboard": True, # 是否监听键盘输入
|
"keyboard": True, # 是否监听键盘输入
|
||||||
"version": "0.3.6",
|
"version": "0.3.6",
|
||||||
}
|
}
|
||||||
c = Config(config)
|
c = Config(config)
|
||||||
@ -1675,8 +1742,10 @@ if __name__ == '__main__':
|
|||||||
options.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
|
options.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
|
||||||
# MacOS需要用option而不是options!
|
# MacOS需要用option而不是options!
|
||||||
option.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
|
option.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
|
||||||
option.add_extension("EasySpider.app/Contents/Resources/app/XPathHelper.crx")
|
option.add_extension(
|
||||||
options.add_extension("EasySpider.app/Contents/Resources/app/XPathHelper.crx")
|
"EasySpider.app/Contents/Resources/app/XPathHelper.crx")
|
||||||
|
options.add_extension(
|
||||||
|
"EasySpider.app/Contents/Resources/app/XPathHelper.crx")
|
||||||
driver_path = "EasySpider.app/Contents/Resources/app/chromedriver_mac64"
|
driver_path = "EasySpider.app/Contents/Resources/app/chromedriver_mac64"
|
||||||
# options.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
|
# options.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
|
||||||
# # MacOS需要用option而不是options!
|
# # MacOS需要用option而不是options!
|
||||||
@ -1684,7 +1753,8 @@ if __name__ == '__main__':
|
|||||||
# driver_path = os.getcwd()+ "/chromedriver_mac64"
|
# driver_path = os.getcwd()+ "/chromedriver_mac64"
|
||||||
print(driver_path)
|
print(driver_path)
|
||||||
if c.config_folder == "":
|
if c.config_folder == "":
|
||||||
c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/")
|
c.config_folder = os.path.expanduser(
|
||||||
|
"~/Library/Application Support/EasySpider/")
|
||||||
# print("Config folder for MacOS:", c.config_folder)
|
# print("Config folder for MacOS:", c.config_folder)
|
||||||
elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径
|
elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径
|
||||||
print("Finding chromedriver in EasySpider",
|
print("Finding chromedriver in EasySpider",
|
||||||
@ -1727,7 +1797,7 @@ if __name__ == '__main__':
|
|||||||
elif os.path.exists(os.getcwd()+"/../ElectronJS"):
|
elif os.path.exists(os.getcwd()+"/../ElectronJS"):
|
||||||
# 软件dev用
|
# 软件dev用
|
||||||
print("Finding chromedriver in EasySpider",
|
print("Finding chromedriver in EasySpider",
|
||||||
os.getcwd()+"/ElectronJS")
|
os.getcwd()+"/ElectronJS")
|
||||||
option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
|
option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
|
||||||
options.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
|
options.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
|
||||||
driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
|
driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
|
||||||
@ -1739,7 +1809,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
option.add_experimental_option(
|
option.add_experimental_option(
|
||||||
'excludeSwitches', ['enable-automation']) # 以开发者模式
|
'excludeSwitches', ['enable-automation']) # 以开发者模式
|
||||||
|
|
||||||
# user_data_dir = r'' # 注意没有Default!
|
# user_data_dir = r'' # 注意没有Default!
|
||||||
|
|
||||||
# options.add_argument('--user-data-dir='+p)
|
# options.add_argument('--user-data-dir='+p)
|
||||||
@ -1780,6 +1850,11 @@ if __name__ == '__main__':
|
|||||||
options.add_argument(
|
options.add_argument(
|
||||||
"--disable-blink-features=AutomationControlled") # TMALL 反扒
|
"--disable-blink-features=AutomationControlled") # TMALL 反扒
|
||||||
|
|
||||||
|
options.add_argument('-ignore-certificate-errors')
|
||||||
|
options.add_argument('-ignore -ssl-errors')
|
||||||
|
option.add_argument('-ignore-certificate-errors')
|
||||||
|
option.add_argument('-ignore -ssl-errors')
|
||||||
|
|
||||||
threads = []
|
threads = []
|
||||||
for i in c.id:
|
for i in c.id:
|
||||||
# print(options)
|
# print(options)
|
||||||
@ -1833,10 +1908,10 @@ if __name__ == '__main__':
|
|||||||
options=options, chrome_options=option, executable_path=driver_path)
|
options=options, chrome_options=option, executable_path=driver_path)
|
||||||
elif cloudflare == 1:
|
elif cloudflare == 1:
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
|
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
|
||||||
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
|
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
|
||||||
browser_t = MyUCChrome(
|
browser_t = MyUCChrome(
|
||||||
options=options, driver_executable_path=driver_path)
|
options=options, driver_executable_path=driver_path)
|
||||||
else:
|
else:
|
||||||
print("Cloudflare模式只支持Windows x64平台。")
|
print("Cloudflare模式只支持Windows x64平台。")
|
||||||
print("Cloudflare Mode only support on Windows x64 platform.")
|
print("Cloudflare Mode only support on Windows x64 platform.")
|
||||||
@ -1849,7 +1924,7 @@ if __name__ == '__main__':
|
|||||||
threads.append(thread)
|
threads.append(thread)
|
||||||
thread.start()
|
thread.start()
|
||||||
# Set the pause operation
|
# Set the pause operation
|
||||||
# if sys.platform != "linux":
|
# if sys.platform != "linux":
|
||||||
# time.sleep(3)
|
# time.sleep(3)
|
||||||
# print("\n\n----------------------------------")
|
# print("\n\n----------------------------------")
|
||||||
# print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
|
# print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
|
||||||
@ -1868,17 +1943,16 @@ if __name__ == '__main__':
|
|||||||
# print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
|
# print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
|
||||||
# 使用监听器监听键盘输入
|
# 使用监听器监听键盘输入
|
||||||
try:
|
try:
|
||||||
if c.keyboard:
|
if c.keyboard:
|
||||||
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
|
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
|
||||||
listener.join()
|
listener.join()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
# print("您的操作系统不支持暂停功能。")
|
# print("您的操作系统不支持暂停功能。")
|
||||||
# print("Your operating system does not support the pause function.")
|
# print("Your operating system does not support the pause function.")
|
||||||
|
|
||||||
|
|
||||||
# print("线程长度:", len(threads) )
|
# print("线程长度:", len(threads) )
|
||||||
|
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
print()
|
print()
|
||||||
thread.join()
|
thread.join()
|
||||||
|
@ -3,7 +3,6 @@ requests==2.31.0
|
|||||||
selenium==4.5.0
|
selenium==4.5.0
|
||||||
pyinstaller==5.9.0
|
pyinstaller==5.9.0
|
||||||
Pillow==9.4.0
|
Pillow==9.4.0
|
||||||
pytesseract==0.3.10
|
|
||||||
openpyxl==3.1.2
|
openpyxl==3.1.2
|
||||||
pymysql==1.1.0
|
pymysql==1.1.0
|
||||||
lxml==4.9.2
|
lxml==4.9.2
|
||||||
|
Loading…
x
Reference in New Issue
Block a user