TEST For IFrame

This commit is contained in:
NaiboWang-Alienware 2023-07-05 08:42:18 +08:00
parent 01866b208a
commit b6fb244837
21 changed files with 143 additions and 410 deletions

View File

@ -432,7 +432,7 @@ function handleOpenBrowser(event, lang = "en", user_data_folder = "", mobile = f
runBrowser(lang, user_data_folder, mobile);
let size = screen.getPrimaryDisplay().workAreaSize;
let width = parseInt(size.width);
let height = parseInt(size.height * 0.7);
let height = parseInt(size.height * 0.65);
flowchart_window = new BrowserWindow({
x: 0,
y: 0,

View File

@ -23,7 +23,7 @@
"@electron-forge/maker-rpm": "^6.0.5",
"@electron-forge/maker-squirrel": "^6.0.5",
"@electron-forge/maker-zip": "^6.0.5",
"electron": "^20.0.1"
"electron": "^25.0.1"
}
},
"node_modules/@electron-forge/cli": {
@ -980,9 +980,10 @@
"optional": true
},
"node_modules/@types/node": {
"version": "16.18.38",
"dev": true,
"license": "MIT"
"version": "18.16.19",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.16.19.tgz",
"integrity": "sha512-IXl7o+R9iti9eBW4Wg2hx1xQDig183jj7YLn8F7udNceyfkbn1ZxmzZXuak20gR40D7pIkIY1kYGx5VIGbaHKA==",
"dev": true
},
"node_modules/@types/responselike": {
"version": "1.0.0",
@ -1579,16 +1580,6 @@
"typedarray": "^0.0.6"
}
},
"node_modules/config-chain": {
"version": "1.1.13",
"dev": true,
"license": "MIT",
"optional": true,
"dependencies": {
"ini": "^1.3.4",
"proto-list": "~1.2.1"
}
},
"node_modules/console-control-strings": {
"version": "1.1.0",
"dev": true,
@ -1799,31 +1790,27 @@
"minimatch": "^3.0.4"
}
},
"node_modules/duplexer3": {
"version": "0.1.5",
"dev": true,
"license": "BSD-3-Clause"
},
"node_modules/eastasianwidth": {
"version": "0.2.0",
"dev": true,
"license": "MIT"
},
"node_modules/electron": {
"version": "20.3.12",
"version": "25.2.0",
"resolved": "https://registry.npmjs.org/electron/-/electron-25.2.0.tgz",
"integrity": "sha512-I/rhcW2sV2fyiveVSBr2N7v5ZiCtdGY0UiNCDZgk2fpSC+irQjbeh7JT2b4vWmJ2ogOXBjqesrN9XszTIG6DHg==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"@electron/get": "^1.14.1",
"@types/node": "^16.11.26",
"@electron/get": "^2.0.0",
"@types/node": "^18.11.18",
"extract-zip": "^2.0.1"
},
"bin": {
"electron": "cli.js"
},
"engines": {
"node": ">= 10.17.0"
"node": ">= 12.20.55"
}
},
"node_modules/electron-installer-common": {
@ -1991,215 +1978,11 @@
"node": ">= 4.0.0"
}
},
"node_modules/electron/node_modules/@electron/get": {
"version": "1.14.1",
"dev": true,
"license": "MIT",
"dependencies": {
"debug": "^4.1.1",
"env-paths": "^2.2.0",
"fs-extra": "^8.1.0",
"got": "^9.6.0",
"progress": "^2.0.3",
"semver": "^6.2.0",
"sumchecker": "^3.0.1"
},
"engines": {
"node": ">=8.6"
},
"optionalDependencies": {
"global-agent": "^3.0.0",
"global-tunnel-ng": "^2.7.1"
}
},
"node_modules/electron/node_modules/@sindresorhus/is": {
"version": "0.14.0",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/electron/node_modules/@szmarczak/http-timer": {
"version": "1.1.2",
"dev": true,
"license": "MIT",
"dependencies": {
"defer-to-connect": "^1.0.1"
},
"engines": {
"node": ">=6"
}
},
"node_modules/electron/node_modules/cacheable-request": {
"version": "6.1.0",
"dev": true,
"license": "MIT",
"dependencies": {
"clone-response": "^1.0.2",
"get-stream": "^5.1.0",
"http-cache-semantics": "^4.0.0",
"keyv": "^3.0.0",
"lowercase-keys": "^2.0.0",
"normalize-url": "^4.1.0",
"responselike": "^1.0.2"
},
"engines": {
"node": ">=8"
}
},
"node_modules/electron/node_modules/decompress-response": {
"version": "3.3.0",
"dev": true,
"license": "MIT",
"dependencies": {
"mimic-response": "^1.0.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/electron/node_modules/defer-to-connect": {
"version": "1.1.3",
"dev": true,
"license": "MIT"
},
"node_modules/electron/node_modules/fs-extra": {
"version": "8.1.0",
"dev": true,
"license": "MIT",
"dependencies": {
"graceful-fs": "^4.2.0",
"jsonfile": "^4.0.0",
"universalify": "^0.1.0"
},
"engines": {
"node": ">=6 <7 || >=8"
}
},
"node_modules/electron/node_modules/got": {
"version": "9.6.0",
"dev": true,
"license": "MIT",
"dependencies": {
"@sindresorhus/is": "^0.14.0",
"@szmarczak/http-timer": "^1.1.2",
"cacheable-request": "^6.0.0",
"decompress-response": "^3.3.0",
"duplexer3": "^0.1.4",
"get-stream": "^4.1.0",
"lowercase-keys": "^1.0.1",
"mimic-response": "^1.0.1",
"p-cancelable": "^1.0.0",
"to-readable-stream": "^1.0.0",
"url-parse-lax": "^3.0.0"
},
"engines": {
"node": ">=8.6"
}
},
"node_modules/electron/node_modules/got/node_modules/get-stream": {
"version": "4.1.0",
"dev": true,
"license": "MIT",
"dependencies": {
"pump": "^3.0.0"
},
"engines": {
"node": ">=6"
}
},
"node_modules/electron/node_modules/got/node_modules/lowercase-keys": {
"version": "1.0.1",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/electron/node_modules/json-buffer": {
"version": "3.0.0",
"dev": true,
"license": "MIT"
},
"node_modules/electron/node_modules/jsonfile": {
"version": "4.0.0",
"dev": true,
"license": "MIT",
"optionalDependencies": {
"graceful-fs": "^4.1.6"
}
},
"node_modules/electron/node_modules/keyv": {
"version": "3.1.0",
"dev": true,
"license": "MIT",
"dependencies": {
"json-buffer": "3.0.0"
}
},
"node_modules/electron/node_modules/normalize-url": {
"version": "4.5.1",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=8"
}
},
"node_modules/electron/node_modules/p-cancelable": {
"version": "1.1.0",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/electron/node_modules/responselike": {
"version": "1.0.2",
"dev": true,
"license": "MIT",
"dependencies": {
"lowercase-keys": "^1.0.0"
}
},
"node_modules/electron/node_modules/responselike/node_modules/lowercase-keys": {
"version": "1.0.1",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/electron/node_modules/semver": {
"version": "6.3.0",
"dev": true,
"license": "ISC",
"bin": {
"semver": "bin/semver.js"
}
},
"node_modules/electron/node_modules/universalify": {
"version": "0.1.2",
"dev": true,
"license": "MIT",
"engines": {
"node": ">= 4.0.0"
}
},
"node_modules/emoji-regex": {
"version": "8.0.0",
"dev": true,
"license": "MIT"
},
"node_modules/encodeurl": {
"version": "1.0.2",
"dev": true,
"license": "MIT",
"optional": true,
"engines": {
"node": ">= 0.8"
}
},
"node_modules/encoding": {
"version": "0.1.13",
"dev": true,
@ -2803,21 +2586,6 @@
"which": "bin/which"
}
},
"node_modules/global-tunnel-ng": {
"version": "2.7.1",
"dev": true,
"license": "BSD-3-Clause",
"optional": true,
"dependencies": {
"encodeurl": "^1.0.2",
"lodash": "^4.17.10",
"npm-conf": "^1.1.3",
"tunnel": "^0.0.6"
},
"engines": {
"node": ">=0.10"
}
},
"node_modules/globalthis": {
"version": "1.0.3",
"dev": true,
@ -3891,28 +3659,6 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/npm-conf": {
"version": "1.1.3",
"dev": true,
"license": "MIT",
"optional": true,
"dependencies": {
"config-chain": "^1.1.11",
"pify": "^3.0.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/npm-conf/node_modules/pify": {
"version": "3.0.0",
"dev": true,
"license": "MIT",
"optional": true,
"engines": {
"node": ">=4"
}
},
"node_modules/npm-run-path": {
"version": "2.0.2",
"dev": true,
@ -4288,14 +4034,6 @@
"node": ">=6"
}
},
"node_modules/prepend-http": {
"version": "2.0.0",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=4"
}
},
"node_modules/process-nextick-args": {
"version": "2.0.1",
"license": "MIT"
@ -4320,12 +4058,6 @@
"node": ">=10"
}
},
"node_modules/proto-list": {
"version": "1.2.4",
"dev": true,
"license": "ISC",
"optional": true
},
"node_modules/pump": {
"version": "3.0.0",
"dev": true,
@ -5050,14 +4782,6 @@
"tmp": "^0.2.0"
}
},
"node_modules/to-readable-stream": {
"version": "1.0.0",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/to-regex-range": {
"version": "5.0.1",
"dev": true,
@ -5098,15 +4822,6 @@
"dev": true,
"license": "0BSD"
},
"node_modules/tunnel": {
"version": "0.0.6",
"dev": true,
"license": "MIT",
"optional": true,
"engines": {
"node": ">=0.6.11 <=0.7.0 || >=0.7.3"
}
},
"node_modules/type-fest": {
"version": "0.21.3",
"dev": true,
@ -5163,17 +4878,6 @@
"node": ">= 10.0.0"
}
},
"node_modules/url-parse-lax": {
"version": "3.0.0",
"dev": true,
"license": "MIT",
"dependencies": {
"prepend-http": "^2.0.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/username": {
"version": "5.1.0",
"dev": true,

View File

@ -27,7 +27,7 @@
"@electron-forge/maker-rpm": "^6.0.5",
"@electron-forge/maker-squirrel": "^6.0.5",
"@electron-forge/maker-zip": "^6.0.5",
"electron": "^20.0.1"
"electron": "^25.0.1"
},
"repository": "https://github.com/NaiboWang/EasySpider",
"dependencies": {
@ -76,4 +76,4 @@
"publishers": []
}
}
}
}

View File

@ -502,7 +502,7 @@
<input onkeydown="inputDelete(event)" required name="serviceName" value="New Crawler Task" id="serviceName" class="form-control"></input>
<label>Task Description:</label>
<input onkeydown="inputDelete(event)" id="serviceDescription" name="serviceDescription" class="form-control"></input>
<label>How many data to save locally each time (the larger the value, the faster the collection speed, but there is a risk of data loss):</label>
<label>How many data to save each time (the larger the value, the faster the collection speed, but there is a risk of data loss):</label>
<input onkeydown="inputDelete(event)" type="number" value="10" id="saveThreshold" name="saveThreshold" class="form-control"></input>
<label>Is the page an extreme anti-crawler website such as Cloudflare:</label>
<select id="cloudflare" name="cloudflare" class="form-control">

View File

@ -503,7 +503,7 @@
<input onkeydown="inputDelete(event)" required name="serviceName" value="新web采集任务" id="serviceName" class="form-control"></input>
<label>任务描述:</label>
<input onkeydown="inputDelete(event)" id="serviceDescription" name="serviceDescription" class="form-control"></input>
<label>每采集多少条数据本地保存一次(值越大采集速度越快,但如果意外退出则有数据丢失风险):</label>
<label>每采集多少条数据保存一次(值越大采集速度越快,但如果意外退出则有数据丢失风险):</label>
<input onkeydown="inputDelete(event)" type="number" value="10" id="saveThreshold" name="saveThreshold" class="form-control"></input>
<label>是否为cloudflare等极端反爬网站</label>
<select id="cloudflare" name="cloudflare" class="form-control">

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html>
<head>
<title>HTML Select example</title>
</head>
<body>
<form>
<label for="cars">Choose a car:</label>
<select id="cars">
<option value="volvo">Volvo</option>
<option value="saab">Saab</option>
<option value="mercedes">Mercedes</option>
<option value="audi">Audi</option>
</select>
</form>
</body>
</html>

View File

@ -0,0 +1 @@
{"id":127,"name":"Dynamic Iframe","url":"http://localhost:8074/taskGrid/test_pages/iframe.html?address=http://www.ceic.ac.cn/history","links":"http://localhost:8074/taskGrid/test_pages/iframe.html?address=http://www.ceic.ac.cn/history","create_time":"7/5/2023, 4:45:57 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"containJudge":false,"desc":"http://localhost:8074/taskGrid/test_pages/iframe.html?address=http://www.ceic.ac.cn/history","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"http://localhost:8074/taskGrid/test_pages/iframe.html?address=http://www.ceic.ac.cn/history","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"http://localhost:8074/taskGrid/test_pages/iframe.html?address=http://www.ceic.ac.cn/history"},{"id":1,"name":"loopText_1","nodeId":2,"nodeName":"循环","desc":"要输入的文本/网址,多行以\\n分开","type":"string","exampleValue":"1\n2\n","value":"1\n2\n"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"http://localhost:8074/taskGrid/test_pages/iframe.html?address=http://www.ceic.ac.cn/history","links":"http://localhost:8074/taskGrid/test_pages/iframe.html?address=http://www.ceic.ac.cn/history","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":true,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":"3","pathList":"","textList":"1\n2\n","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0}},{"id":3,"index":3,"parentId":2,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":true,"xpath":"//*[@id=\"weidu1\"]","iframe":true,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"123","allXPaths":["/html/body/div[1]/div[3]/div[1]/div[1]/div[1]/form[1]/div[2]/input[1]","//input[contains(., '')]","id(\"weidu1\")","//INPUT[@class='span1']","//INPUT[@name='weidu1']","/html/body/div[last()-3]/div[last()-1]/div/div/div[last()-1]/form/div[last()-3]/input[last()-1]"]}},{"id":4,"index":4,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search\"]","iframe":true,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[1]/div[3]/div[1]/div[1]/div[1]/form[1]/div[5]/a[1]","//a[contains(., '查询')]","id(\"search\")","//A[@class='check']","/html/body/div[last()-3]/div[last()-1]/div/div/div[last()-1]/form/div/a"]}}]}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"id":129,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/5/2023, 5:00:59 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}}]}

View File

@ -0,0 +1 @@
{"id":130,"name":"百度一下,你就知道","url":"https://www.baidu.com/","links":"https://www.baidu.com/","create_time":"7/5/2023, 5:15:46 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"containJudge":false,"desc":"https://www.baidu.com/","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.baidu.com/","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.baidu.com/"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"把牢正确方向 增进团结互信"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.baidu.com/","links":"https://www.baidu.com/","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/div[5]/div[1]/div[1]/div[3]/ul[1]/li/a[1]","iframe":true,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[1]/div[1]/div[5]/div[1]/div[1]/div[3]/ul[1]/li/a[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":1,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":3,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[3]/div[1]/div[1]/div[1]/div[1]/form[1]/span[1]/input[1]","allXPaths":["/html/body/div[3]/div[1]/div[1]/div[1]/div[1]/form[1]/span[1]/input[1]","//input[contains(., '')]","id(\"kw\")","//INPUT[@class='s_ipt']","//INPUT[@name='wd']","/html/body/div[last()-6]/div[last()-3]/div/div[last()-3]/div/form/span[last()-2]/input"],"exampleValues":[{"num":0,"value":"把牢正确方向 增进团结互信"}],"unique_index":"s89sfrujkzljosgb72","iframe":false,"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"id":135,"name":"HTML Select example","url":"http://localhost:8074/taskGrid/test_pages/select.html","links":"http://localhost:8074/taskGrid/test_pages/select.html","create_time":"7/5/2023, 6:52:44 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"containJudge":false,"desc":"http://localhost:8074/taskGrid/test_pages/select.html","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"http://localhost:8074/taskGrid/test_pages/select.html","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"http://localhost:8074/taskGrid/test_pages/select.html"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"http://localhost:8074/taskGrid/test_pages/select.html","links":"http://localhost:8074/taskGrid/test_pages/select.html","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":6,"title":"切换下拉选项","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"cars\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":["/html/body/form[1]/select[1]","//select[contains(., '')]","id(\"cars\")","/html/body/form/select"],"optionMode":0,"optionValue":"Saab"}}]}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -10,7 +10,7 @@
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true,
"args": ["--id", "[38]", "--read_type", "remote", "--headless", "0"]
"args": ["--id", "[95]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
// "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"]
}

View File

@ -89,7 +89,6 @@ class BrowserThread(Thread):
filter(isnull, service["links"].split("\n"))) # 要执行的link的列表
self.OUTPUT = [] # 采集的数据
self.containJudge = service["containJudge"] # 是否含有判断语句
self.bodyText = "" # 记录bodyText
tOut = service["outputParameters"] # 生成输出参数对象
self.outputParameters = {}
self.dataNotFoundKeys = {} # 记录没有找到数据的key
@ -123,10 +122,6 @@ class BrowserThread(Thread):
except:
para["iframe"] = False
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
# # iframe中提取数据的绝对寻址操作不可优化
# if para["relative"] == False and para["iframe"] == True:
# para["optimizable"] = False
# else:
para["optimizable"] = True
else:
para["optimizable"] = False
@ -206,7 +201,7 @@ class BrowserThread(Thread):
if rt != "":
rt.end()
def execute_code(self, codeMode, code, max_wait_time, element=None):
def execute_code(self, codeMode, code, max_wait_time, element=None, iframe=False):
output = ""
if code == "":
return ""
@ -221,6 +216,24 @@ class BrowserThread(Thread):
except:
replaced_text = code
code = replaced_text
if iframe and self.browser.iframe_env == False:
# 获取所有的 iframe
self.browser.switch_to.default_content()
iframes = self.browser.find_elements(
By.CSS_SELECTOR, "iframe", iframe=False)
# 遍历所有的 iframe 并点击里面的元素
for iframe in iframes:
# 切换到 iframe
try:
self.browser.switch_to.default_content()
self.browser.switch_to.frame(iframe)
self.browser.iframe_env = True
break
except:
print("Iframe switch failed")
elif not iframe and self.browser.iframe_env == True:
self.browser.switch_to.default_content()
self.browser.iframe_env = False
if int(codeMode) == 0:
self.recordLog("Execute JavaScript:" + code)
self.recordLog("执行JavaScript:" + code)
@ -271,12 +284,13 @@ class BrowserThread(Thread):
By.XPATH, loopPath, iframe=paras["iframe"])
element = elements[index]
output = self.execute_code(
codeMode, code, max_wait_time, element)
codeMode, code, max_wait_time, element, iframe=paras["iframe"])
except:
output = ""
print("JavaScript execution failed")
else:
output = self.execute_code(codeMode, code, max_wait_time)
output = self.execute_code(
codeMode, code, max_wait_time, iframe=paras["iframe"])
recordASField = int(paras["recordASField"])
if recordASField:
self.outputParameters[node["title"]] = output
@ -321,9 +335,12 @@ class BrowserThread(Thread):
time.sleep(0.1) # 移动之前等待0.1秒
if para["useLoop"]: # 使用循环的情况下传入的clickPath就是实际的xpath
path = loopPath
# element = loopElement
else:
index = 0
path = para["xpath"] # 不然使用元素定义的xpath
# element = self.browser.find_element(
# By.XPATH, path, iframe=para["iframe"])
try:
elements = self.browser.find_elements(
By.XPATH, path, iframe=para["iframe"])
@ -405,7 +422,9 @@ class BrowserThread(Thread):
break
elif tType == 1: # 当前页面包含文本
try:
if self.bodyText.find(cnode["parameters"]["value"]) >= 0:
bodyText = self.browser.find_element(
By.CSS_SELECTOR, "body", iframe=cnode["parameters"]["iframe"]).text
if bodyText.find(cnode["parameters"]["value"]) >= 0:
executeBranchId = i
break
except: # 找不到元素下一个条件
@ -426,7 +445,7 @@ class BrowserThread(Thread):
continue
elif tType == 4: # 当前循环元素包括元素
try:
if loopElement.find_element(By.XPATH, cnode["parameters"]["value"][1:], iframe=cnode["parameters"]["iframe"]):
if loopElement.find_element(By.XPATH, cnode["parameters"]["value"][1:]):
executeBranchId = i
break
except: # 找不到元素或者xpath写错了下一个条件
@ -434,13 +453,13 @@ class BrowserThread(Thread):
elif tType <= 7: # JS命令返回值
if tType == 5: # JS命令返回值等于
output = self.execute_code(
0, cnode["parameters"]["code"], cnode["parameters"]["waitTime"])
0, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], iframe=cnode["parameters"]["iframe"])
elif tType == 6: # System
output = self.execute_code(
1, cnode["parameters"]["code"], cnode["parameters"]["waitTime"])
1, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], iframe=cnode["parameters"]["iframe"])
elif tType == 7: # 针对当前循环项的JS命令返回值
output = self.execute_code(
2, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], loopElement)
2, cnode["parameters"]["code"], cnode["parameters"]["waitTime"], loopElement, iframe=cnode["parameters"]["iframe"])
try:
if output.find("rue") != -1: # 如果返回值中包含true
code = 1
@ -512,7 +531,7 @@ class BrowserThread(Thread):
break
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
output = self.execute_code(int(
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
code = get_output_code(output)
if code <= 0:
break
@ -547,13 +566,13 @@ class BrowserThread(Thread):
time.sleep(node["parameters"]["historyWait"])
# else:
# time.sleep(2)
# 切换历史记录等待2秒或者
# 切换历史记录等待
self.Log("Change history back time or:",
node["parameters"]["historyWait"])
self.browser.execute_script('window.stop()')
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
output = self.execute_code(int(
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
code = get_output_code(output)
if code <= 0:
break
@ -600,7 +619,7 @@ class BrowserThread(Thread):
raise
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
output = self.execute_code(int(
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
code = get_output_code(output)
if code <= 0:
break
@ -612,7 +631,7 @@ class BrowserThread(Thread):
self.executeNode(i, text, "", 0)
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
output = self.execute_code(int(
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
code = get_output_code(output)
if code <= 0:
break
@ -630,7 +649,7 @@ class BrowserThread(Thread):
self.executeNode(i, url, "", 0)
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
output = self.execute_code(int(
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"])
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
code = get_output_code(output)
if code <= 0:
break
@ -638,10 +657,10 @@ class BrowserThread(Thread):
while True: # do while循环
if int(node["parameters"]["loopType"]) == 5: # JS
output = self.execute_code(
0, node["parameters"]["code"], node["parameters"]["waitTime"])
0, node["parameters"]["code"], node["parameters"]["waitTime"], iframe=node["parameters"]["iframe"])
elif int(node["parameters"]["loopType"]) == 6: # System
output = self.execute_code(
1, node["parameters"]["code"], node["parameters"]["waitTime"])
1, node["parameters"]["code"], node["parameters"]["waitTime"], iframe=node["parameters"]["iframe"])
code = get_output_code(output)
if code <= 0:
break
@ -707,29 +726,8 @@ class BrowserThread(Thread):
except:
self.history["index"] = 0
self.scrollDown(para) # 控制屏幕向下滚动
if self.containJudge:
try:
self.bodyText = self.browser.find_element(
By.CSS_SELECTOR, "body", iframe=False).text
self.Log('URL Page: ' + url)
self.recordLog('URL Page: ' + url)
except TimeoutException:
self.Log(
'Time out after set seconds when getting body text: ' + url)
self.recordLog(
'Time out after set seconds when getting body text:: ' + url)
self.browser.execute_script('window.stop()')
time.sleep(1)
self.Log("Need to wait 1 second to get body text")
# 再执行一遍
self.bodyText = self.browser.find_element(
By.CSS_SELECTOR, "body", iframe=False).text
except Exception as e:
self.Log(e)
self.recordLog(str(e))
# 键盘输入事件
def inputInfo(self, para, loopValue):
time.sleep(0.1) # 输入之前等待0.1秒
self.Log("Wait 0.1 second before input")
@ -739,7 +737,7 @@ class BrowserThread(Thread):
# textbox.send_keys(Keys.CONTROL, 'a')
# textbox.send_keys(Keys.BACKSPACE)
self.execute_code(
2, para["beforeJS"], para["beforeJSWaitTime"], textbox) # 执行前置JS
2, para["beforeJS"], para["beforeJSWaitTime"], textbox, iframe=para["iframe"]) # 执行前置JS
# Send the HOME key
textbox.send_keys(Keys.HOME)
# Send the SHIFT + END key combination
@ -764,10 +762,7 @@ class BrowserThread(Thread):
if value.lower().find("<enter>") >= 0:
textbox.send_keys(Keys.ENTER)
self.execute_code(
2, para["afterJS"], para["afterJSWaitTime"], textbox) # 执行后置js
# global bodyText # 每次执行点击输入元素和打开网页操作后需要更新bodyText
self.bodyText = self.browser.find_element(
By.CSS_SELECTOR, "body").text
2, para["afterJS"], para["afterJSWaitTime"], textbox, iframe=para["iframe"]) # 执行后置js
except:
print("Cannot find input box element:" +
para["xpath"] + ", please try to set the wait time before executing this operation")
@ -780,10 +775,6 @@ class BrowserThread(Thread):
def clickElement(self, para, loopElement=None, clickPath="", index=0):
time.sleep(0.1) # 点击之前等待0.1秒
self.Log("Wait 0.1 second before clicking element")
if para["useLoop"]: # 使用循环的情况下传入的clickPath就是实际的xpath
path = clickPath
else:
path = para["xpath"] # 不然使用元素定义的xpath
try:
maxWaitTime = int(para["maxWaitTime"])
except:
@ -792,11 +783,22 @@ class BrowserThread(Thread):
self.browser.set_script_timeout(maxWaitTime)
# 点击前对该元素执行一段JavaScript代码
try:
element = self.browser.find_element(
# element = self.browser.find_element(
# By.XPATH, path, iframe=para["iframe"])
if para["useLoop"]: # 使用循环的情况下传入的clickPath就是实际的xpath
path = clickPath
# element = loopElement
else:
index = 0
path = para["xpath"] # 不然使用元素定义的xpath
# element = self.browser.find_element(
# By.XPATH, path, iframe=para["iframe"])
elements = self.browser.find_elements(
By.XPATH, path, iframe=para["iframe"])
element = elements[index]
if para["beforeJS"] != "":
self.execute_code(2, para["beforeJS"],
para["beforeJSWaitTime"], element)
para["beforeJSWaitTime"], element, iframe=para["iframe"])
except:
print("Cannot find element:" +
path + ", please try to set the wait time before executing this operation")
@ -809,7 +811,7 @@ class BrowserThread(Thread):
except:
click_way = 0
try:
if click_way == 0 or para["iframe"]: # 用selenium的点击方法
if click_way == 0: # 用selenium的点击方法
actions = ActionChains(self.browser) # 实例化一个action对象
actions.click(element).perform()
elif click_way == 1: # 用js的点击方法
@ -824,13 +826,13 @@ class BrowserThread(Thread):
except Exception as e:
self.Log(e)
self.recordLog(str(e))
# 点击对该元素执行一段JavaScript代码
# 点击对该元素执行一段JavaScript代码
try:
if para["afterJS"] != "":
element = self.browser.find_element(
By.XPATH, path, iframe=para["iframe"])
self.execute_code(2, para["afterJS"],
para["afterJSWaitTime"], element)
para["afterJSWaitTime"], element, iframe=para["iframe"])
except:
print("Cannot find element:" + path)
self.recordLog("Cannot find element:" +
@ -866,25 +868,6 @@ class BrowserThread(Thread):
"return history.length")
# 如果打开了新窗口,切换到新窗口
self.scrollDown(para) # 根据参数配置向下滚动
if self.containJudge: # 有判断语句才执行以下操作
# global bodyText # 每次执行点击输入元素和打开网页操作后需要更新bodyText
try:
self.bodyText = self.browser.find_element(
By.CSS_SELECTOR, "body").text
except TimeoutException:
self.Log('Time out after 10 seconds when getting body text')
self.recordLog(
'Time out after 10 seconds when getting body text')
self.browser.execute_script('window.stop()')
time.sleep(1)
self.Log("wait one second after get body text")
# 再执行一遍
self.bodyText = self.browser.find_element(
By.CSS_SELECTOR, "body").text
# rt.end()
except Exception as e:
self.Log(e)
self.recordLog(str(e))
# rt.end()
def get_content(self, p, element):
@ -1000,7 +983,8 @@ class BrowserThread(Thread):
print(e)
print("注意以上错误要使用OCR识别功能你需要安装Tesseract-OCR并将其添加到环境变量PATH中添加后需重启EasySpiderhttps://blog.csdn.net/u010454030/article/details/80515501\nhttps://www.bilibili.com/video/BV1xz4y1b72D/")
elif p["contentType"] == 9:
content = self.execute_code(2, p["JS"], p["JSWaitTime"], element)
content = self.execute_code(
2, p["JS"], p["JSWaitTime"], element, iframe=p["iframe"])
elif p["contentType"] == 10: # 下拉框选中的值
try:
select_element = Select(element)
@ -1020,9 +1004,19 @@ class BrowserThread(Thread):
def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
pageHTML = etree.HTML(self.browser.page_source)
try:
loopElementOuterHTML = loopElement.get_attribute('outerHTML')
except:
if loopElement != "": # 只在数据在循环中提取时才需要获取循环元素
try:
loopElementOuterHTML = loopElement.get_attribute('outerHTML')
except:
try: # 循环点击每个链接如果没有新标签页打开loopElement会丢失此时需要重新获取
elements = self.browser.find_elements(
By.XPATH, parentPath, iframe=para["paras"][0]["iframe"])
loopElement = elements[index]
loopElementOuterHTML = loopElement.get_attribute(
'outerHTML')
except:
loopElementOuterHTML = ""
else:
loopElementOuterHTML = ""
loopElementHTML = etree.HTML(loopElementOuterHTML)
for p in para["paras"]:
@ -1053,7 +1047,7 @@ class BrowserThread(Thread):
content = loopElementHTML.xpath(
"/html/body/" + loopElementHTML[0][0].tag + xpath)
else:
if xpath.find("/html/body") < 0:
if xpath.find("/body") < 0:
xpath = "/html/body" + xpath
content = pageHTML.xpath(xpath)
if len(content) > 0:
@ -1145,7 +1139,7 @@ class BrowserThread(Thread):
By.XPATH, "//body", iframe=p["iframe"])
try:
self.execute_code(
2, p["beforeJS"], p["beforeJSWaitTime"], element) # 执行前置js
2, p["beforeJS"], p["beforeJSWaitTime"], element, iframe=p["iframe"]) # 执行前置js
content = self.get_content(p, element)
except StaleElementReferenceException: # 发生找不到元素的异常后,等待几秒重新查找
self.recordLog(
@ -1174,7 +1168,7 @@ class BrowserThread(Thread):
continue # 再出现类似问题直接跳过
self.outputParameters[p["name"]] = content
self.execute_code(
2, p["afterJS"], p["afterJSWaitTime"], element) # 执行后置JS
2, p["afterJS"], p["afterJSWaitTime"], element, iframe=p["iframe"]) # 执行后置JS
line = []
for value in self.outputParameters.values():
line.append(value)

View File

@ -25,10 +25,10 @@ class MyChrome(webdriver.Chrome):
super().__init__(*args, **kwargs) # 调用父类的 __init__
def find_element(self, by=By.ID, value=None, iframe=False):
# 在这里改变查找元素的行为
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False
# 在这里改变查找元素的行为
if iframe:
# 获取所有的 iframe
try:
@ -47,7 +47,7 @@ class MyChrome(webdriver.Chrome):
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
element = super().find_element(by=by, value=value)
find_element = True
except NoSuchElementException:
except:
print("No such element found in the iframe")
# 完成操作后切回主文档
# super().switch_to.default_content()
@ -59,10 +59,10 @@ class MyChrome(webdriver.Chrome):
return super().find_element(by=by, value=value)
def find_elements(self, by=By.ID, value=None, iframe=False):
# 在这里改变查找元素的行为
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False
# 在这里改变查找元素的行为
if iframe:
# 获取所有的 iframe
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
@ -70,18 +70,21 @@ class MyChrome(webdriver.Chrome):
# 遍历所有的 iframe 并点击里面的元素
for iframe in iframes:
# 切换到 iframe
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
# 在 iframe 中查找并点击元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
elements = super().find_elements(by=by, value=value)
if len(elements) > 0:
find_element = True
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
return elements
try:
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
# 在 iframe 中查找并点击元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
elements = super().find_elements(by=by, value=value)
if len(elements) > 0:
find_element = True
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
return elements
except:
print("No such element found in the iframe")
if not find_element:
raise NoSuchElementException
else: