mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-20 04:39:57 +08:00
可以选择部分字段不输出到文件或数据库
This commit is contained in:
parent
dbab4e5055
commit
548414b12f
Binary file not shown.
Binary file not shown.
@ -242,15 +242,16 @@
|
||||
</div>
|
||||
<label>参数类型转换为(用于Excel和数据库):</label>
|
||||
<select v-model='paras.parameters[paraIndex]["paraType"]' class="form-control">
|
||||
<option value = "text">文本(长度预估超过1万请选择大文本)</option>
|
||||
<option value = "int">整数</option>
|
||||
<option value = "text">文本(单个值长度预估超过1万请选择大文本)</option>
|
||||
<option value = "int">整数(位数在9位以内)</option>
|
||||
<option value = "double">浮点数(小数)</option>
|
||||
<option value = "mediumText">大文本(单个值长度超过1万)</option>
|
||||
<option value = "mediumText">大文本(单个值长度超过1万低于100万)</option>
|
||||
<option value = "datetime">日期时间</option>
|
||||
<option value = "date">日期</option>
|
||||
<option value = "time">时间</option>
|
||||
<option value = "varchar">小文本(单个值长度小于50)</option>
|
||||
<option value = "longText">超大文本(单个值长度超过100万)</option>
|
||||
<option value = "bigInt">大整数(位数超过9位)</option>
|
||||
</select>
|
||||
<label>采集内容类型</label>
|
||||
<select v-model='paras.parameters[paraIndex]["contentType"]' class="form-control">
|
||||
@ -263,14 +264,15 @@
|
||||
<option :value = 6>页面标题</option>
|
||||
<option :value = 7>元素截图</option>
|
||||
<option :value = 8>OCR识别文字</option>
|
||||
<option :value = 9>针对该元素的JavaScript代码返回值(需以return 开头)</option>
|
||||
<option :value = 9>(针对该元素的)JavaScript代码返回值(需以return 开头)</option>
|
||||
<option :value = 12>系统命令返回值</option>
|
||||
<option :value = 10>当前选择框选中的选项值</option>
|
||||
<option :value = 11>当前选择框选中的选项文本</option>
|
||||
</select>
|
||||
<div v-if='paras.parameters[paraIndex]["contentType"] == 9'>
|
||||
<label>JavaScript代码:</label>
|
||||
<div v-if='paras.parameters[paraIndex]["contentType"] == 9 || paras.parameters[paraIndex]["contentType"] == 12'>
|
||||
<label>JavaScript代码(也可以不针对该元素,直接写return JS代码即可)/系统命令代码:</label>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
|
||||
placeholder='该元素用arguments[0]来表示,示例:return arguments[0].innerText + "美元",即实现了提取该元素innerText并后面加“美元”的功能。' v-model='paras.parameters[paraIndex]["JS"]'></textarea>
|
||||
placeholder='如要针对该元素,则该元素用arguments[0]来表示,示例:return arguments[0].innerText + "美元",即实现了提取该元素innerText并后面加“美元”的功能;不然直接如写return new Date().toString()即可获得当前时间戳。' v-model='paras.parameters[paraIndex]["JS"]'></textarea>
|
||||
<label>最长等待脚本执行时间(0代表无限等待): </label>
|
||||
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='paras.parameters[paraIndex]["JSWaitTime"]'></input>
|
||||
</div>
|
||||
@ -294,7 +296,11 @@
|
||||
<!-- <option :value = 0>普通提取</option>-->
|
||||
<!-- <option :value = 1>OCR提取</option>-->
|
||||
<!-- </select>-->
|
||||
|
||||
<label style="margin-top: 15px">是否保存该字段(只想把此字段当变量而不想保存时可选否):</label>
|
||||
<select v-model='paras.parameters[paraIndex]["recordASField"]' class="form-control">
|
||||
<option :value = 1>是</option>
|
||||
<option :value = 0>否</option>
|
||||
</select>
|
||||
<label>参数描述:</label>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" style="min-height: 60px" v-model='paras.parameters[paraIndex]["desc"]'></textarea>
|
||||
<label>元素找不到时的值:</label>
|
||||
@ -346,21 +352,30 @@
|
||||
<option value = 0>执行一段JavaScript脚本</option>
|
||||
<option value = 1>执行一段操作系统级别命令</option>
|
||||
<option v-if="nowNode['isInLoop']" value = 2>针对当前循环项的JavaScript脚本</option>
|
||||
<option v-if="nowNode['isInLoop']" value = 3>退出当前循环(Break操作,选择这个可以忽略下面的选项)</option>
|
||||
<!-- <option v-if="nowNode['isInLoop']" value = 4>跳过当前循环后面的操作(Continue操作,选择这个可以忽略下面的选项)</option>-->
|
||||
</select>
|
||||
|
||||
<div>
|
||||
<div v-if='nowNode["parameters"]["codeMode"] < 3'>
|
||||
<label>代码/脚本内容(用Field["字段名"]来输入某字段提取到的最新值): </label>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["code"]' placeholder="输入JS或系统命令,如:document.body.innerText = '1' 或 python D:/test.py,分别为JS命令和系统命令示例。如选择针对当前循环项的JS脚本,则循环项元素用arguments[0]表示,如arguments[0].style.color = 'blue'"></textarea>
|
||||
<p style="margin-top: 15px">是否将执行后的输出/返回值作为字段记录:</p>
|
||||
<p><select v-model='nowNode["parameters"]["recordASField"]' class="form-control">
|
||||
<option value = 0>否</option>
|
||||
<option value = 1>是(JavaScript脚本需要以return 开头)</option>
|
||||
</select>
|
||||
<label>参数转换类型:</label>
|
||||
<option :value = 0>否</option>
|
||||
<option :value = 1>是(JavaScript脚本需要以return 开头)</option>
|
||||
</select></p>
|
||||
<p><label>参数类型转换为:</label>
|
||||
<select v-model='nowNode["parameters"]["paraType"]' class="form-control">
|
||||
<option value = "text">文本</option>
|
||||
<option value = "int">整数</option>
|
||||
<option value = "double">浮点数</option>
|
||||
<option value = "text">文本(单个值长度预估超过1万请选择大文本)</option>
|
||||
<option value = "int">整数(位数在9位以内)</option>
|
||||
<option value = "double">浮点数(小数)</option>
|
||||
<option value = "mediumText">大文本(单个值长度超过1万低于100万)</option>
|
||||
<option value = "datetime">日期时间</option>
|
||||
<option value = "date">日期</option>
|
||||
<option value = "time">时间</option>
|
||||
<option value = "varchar">小文本(单个值长度小于50)</option>
|
||||
<option value = "longText">超大文本(单个值长度超过100万)</option>
|
||||
<option value = "bigInt">大整数(位数超过9位)</option>
|
||||
</select>
|
||||
</p>
|
||||
<label>最长等待脚本执行时间(0代表无限等待): </label>
|
||||
@ -457,7 +472,7 @@
|
||||
<input onkeydown="inputDelete(event)" class="form-control" v-model.number="nowNode['parameters']['scrollWaitTime']" type="number" required></input>
|
||||
<div id="breakAdvanced" v-if='nowNode["parameters"]["loopType"] < 5'>
|
||||
<div>
|
||||
<p><label>(高级操作)使用代码/脚本定义循环退出条件: </label></p>
|
||||
<p><label>(高级操作)使用代码/脚本定义循环退出条件(也可以在流程中添加自定义操作,然后选择Break选项): </label></p>
|
||||
<select v-model='nowNode["parameters"]["breakMode"]' class="form-control" style="font-weight: bold">
|
||||
<option value = 0>不设置脚本(选择这个下面写了脚本也不会执行)</option>
|
||||
<option value = 1>JavaScript脚本返回值(需以return 开头)</option>
|
||||
@ -465,7 +480,7 @@
|
||||
</select>
|
||||
<div>
|
||||
<textarea style="margin-top: 10px" onkeydown="inputDelete(event)" class="form-control" rows="2"
|
||||
placeholder='命令返回值小于等于0或为假时则直接退出循环,不管其他条件如何。如:return document.body.scrollWidth > 1000 或 python D:/test.py,分别为JS命令和系统命令返回值示例。' v-model='nowNode["parameters"]["breakCode"]'></textarea>
|
||||
placeholder='命令返回值小于等于0或为假时则直接退出循环,不管其他条件如何。如:document.evaluate("//div[1]/a", document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue == null 或 python D:/test.py,分别为JS命令(判断某元素是否存在)和系统命令返回值示例。' v-model='nowNode["parameters"]["breakCode"]'></textarea>
|
||||
<label>最长等待脚本执行时间(0代表无限等待):</label>
|
||||
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["breakCodeWaitTime"]'></input>
|
||||
</div>
|
||||
@ -548,8 +563,8 @@
|
||||
<option value = "txt">TXT</option>
|
||||
<option value = "mysql">MySQL数据库</option>
|
||||
</select>
|
||||
<label>导出文件名/数据库表格名称(如果值为“Time”则名称为执行任务时的时间):</label>
|
||||
<input onkeydown="inputDelete(event)" value="Time" id="saveName" class="form-control"></input>
|
||||
<label>导出文件名/数据库表格名称(名称中的“current_time”会被替换为执行任务时的时间戳):</label>
|
||||
<input onkeydown="inputDelete(event)" value="current_time" id="saveName" class="form-control"></input>
|
||||
<label>是否为cloudflare等极端反爬网站:</label>
|
||||
<select id="cloudflare" name="cloudflare" class="form-control">
|
||||
<option value = 0>否</option>
|
||||
|
@ -112,7 +112,8 @@ let app = new Vue({
|
||||
"name": "自定义参数_" + this.nowNode["parameters"]["paras"].length.toString(),
|
||||
"desc": "",
|
||||
"extractType": 0,
|
||||
"relativeXPath": "",
|
||||
"relativeXPath": "//body",
|
||||
"recordASField": 1,
|
||||
"allXPaths": [],
|
||||
"exampleValues": [
|
||||
{
|
||||
@ -124,6 +125,7 @@ let app = new Vue({
|
||||
"beforeJS": "",
|
||||
"beforeJSWaitTime": 0,
|
||||
"JS": "",
|
||||
"paraType": "text",
|
||||
"JSWaitTime": 0,
|
||||
"afterJS": "",
|
||||
"afterJSWaitTime": 0,
|
||||
|
@ -186,18 +186,18 @@
|
||||
<table class="table table-bordered">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th style="min-width: 50px;">ID</th>
|
||||
<th>{{"Parameter Name~参数名称" | lang}}</th>
|
||||
<th>{{"Invoke Name~调用名称" | lang}}</th>
|
||||
<th>{{"Parameter Type~参数类型" | lang}}</th>
|
||||
<th style="min-width: 50px;text-align: center">ID</th>
|
||||
<th style="text-align: center">{{"Parameter Name~参数名称" | lang}}</th>
|
||||
<th style="text-align: center">{{"Invoke Name~调用名称" | lang}}</th>
|
||||
<th style="text-align: center">{{"Parameter Type~参数类型" | lang}}</th>
|
||||
<th>{{"Parameter Value~参数值" | lang}}</th>
|
||||
</tr>
|
||||
|
||||
<tr v-for="i in task.inputParameters.length" v-if="task.inputParameters.length>0">
|
||||
<td style="min-width: 50px;">{{i}}</td>
|
||||
<td style="max-width: 100px;">{{task.inputParameters[i-1]["nodeName"]}}</td>
|
||||
<td>{{task.inputParameters[i-1]["name"]}}</td>
|
||||
<td style="max-width: 100px;">{{task.inputParameters[i-1]["type"]}}</td>
|
||||
<td style="min-width: 50px;text-align: center">{{i}}</td>
|
||||
<td style="max-width: 100px; text-align: center">{{task.inputParameters[i-1]["nodeName"]}}</td>
|
||||
<td style="text-align: center">{{task.inputParameters[i-1]["name"]}}</td>
|
||||
<td style="max-width: 100px; text-align: center">{{task.inputParameters[i-1]["type"]}}</td>
|
||||
<td><textarea class="form-control"
|
||||
style="min-height: 50px;min-width: 300px;" v-bind:name="task.inputParameters[i-1]['name']"
|
||||
v-model="task.inputParameters[i-1]['value']"></textarea></td>
|
||||
|
@ -42,6 +42,7 @@ ws.onmessage = function(evt) {
|
||||
function changeGetDataParameters(msg, i) {
|
||||
msg["parameters"][i]["default"] = ""; //找不到元素时候的默认值
|
||||
msg["parameters"][i]["paraType"] = "text"; //参数类型
|
||||
msg["parameters"][i]["recordASField"] = 1; //是否记录为字段值
|
||||
msg["parameters"][i]["beforeJS"] = ""; //执行前执行的js
|
||||
msg["parameters"][i]["beforeJSWaitTime"] = 0; //执行前js等待时间
|
||||
msg["parameters"][i]["JS"] = ""; //如果是JS,需要执行的js
|
||||
@ -393,32 +394,34 @@ function saveService(type) {
|
||||
name: nodeList[i]["parameters"]["paras"][j]["name"],
|
||||
desc: nodeList[i]["parameters"]["paras"][j]["desc"],
|
||||
type: nodeList[i]["parameters"]["paras"][j]["paraType"],
|
||||
recordASField: nodeList[i]["parameters"]["paras"][j]["recordASField"],
|
||||
exampleValue: nodeList[i]["parameters"]["paras"][j]["exampleValues"][0]["value"],
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (nodeList[i]["option"] == 5) //自定义操作
|
||||
{
|
||||
if (nodeList[i]["parameters"]["recordASField"] == 1) {
|
||||
let id = outputIndex++;
|
||||
let title = nodeList[i]["title"];
|
||||
// if (outputNames.indexOf(title) >= 0) { //参数名称已经被添加
|
||||
// $('#myModal').modal('hide');
|
||||
// $("#tip2").slideDown(); //提示框
|
||||
// fadeout = setTimeout(function() {
|
||||
// $("#tip2").slideUp();
|
||||
// }, 5000);
|
||||
// return;
|
||||
// }
|
||||
outputNames.push(title);
|
||||
outputParameters.push({
|
||||
id: id,
|
||||
name: title,
|
||||
desc: "自定义操作返回的数据",
|
||||
type: nodeList[i]["parameters"]["paraType"],
|
||||
exampleValue: "",
|
||||
});
|
||||
}
|
||||
// if (nodeList[i]["parameters"]["recordASField"] == 1) {
|
||||
let id = outputIndex++;
|
||||
let title = nodeList[i]["title"];
|
||||
// if (outputNames.indexOf(title) >= 0) { //参数名称已经被添加
|
||||
// $('#myModal').modal('hide');
|
||||
// $("#tip2").slideDown(); //提示框
|
||||
// fadeout = setTimeout(function() {
|
||||
// $("#tip2").slideUp();
|
||||
// }, 5000);
|
||||
// return;
|
||||
// }
|
||||
outputNames.push(title);
|
||||
outputParameters.push({
|
||||
id: id,
|
||||
name: title,
|
||||
desc: "自定义操作返回的数据",
|
||||
type: nodeList[i]["parameters"]["paraType"],
|
||||
recordASField: nodeList[i]["parameters"]["recordASField"],
|
||||
exampleValue: "",
|
||||
});
|
||||
// }
|
||||
} else if (nodeList[i]["option"] == 9) //条件判断
|
||||
{
|
||||
containJudge = true;
|
||||
|
@ -54,28 +54,28 @@
|
||||
<table class="table table-bordered">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th style="min-width: 50px;">ID</th>
|
||||
<th>{{"Parameter Name~参数名称" | lang}}</th>
|
||||
<th>{{"Invoke Name~调用名称" | lang}}</th>
|
||||
<th>{{"Parameter Type~参数类型" | lang}}</th>
|
||||
<th style="min-width: 50px; text-align: center">ID</th>
|
||||
<th style="text-align: center">{{"Parameter Name~参数名称" | lang}}</th>
|
||||
<th style="text-align: center">{{"Invoke Name~调用名称" | lang}}</th>
|
||||
<th style="text-align: center">{{"Parameter Type~参数类型" | lang}}</th>
|
||||
<th>{{"Example Value~示例值" | lang}}</th>
|
||||
<th>{{"Parameter Description~参数描述" | lang}}</th>
|
||||
</tr>
|
||||
<tr v-if="task.inputParameters.length>0" v-for="i in task.inputParameters.length">
|
||||
<td style="min-width: 50px;">{{i}}</td>
|
||||
<td>{{task.inputParameters[i-1]["nodeName"]}}</td>
|
||||
<td>{{task.inputParameters[i-1]["name"]}}</td>
|
||||
<td>{{task.inputParameters[i-1]["type"]}}</td>
|
||||
<td style="min-width: 50px; text-align: center">{{i}}</td>
|
||||
<td style="text-align: center">{{task.inputParameters[i-1]["nodeName"]}}</td>
|
||||
<td style="text-align: center">{{task.inputParameters[i-1]["name"]}}</td>
|
||||
<td style="text-align: center">{{task.inputParameters[i-1]["type"]}}</td>
|
||||
<td>{{task.inputParameters[i-1]["exampleValue"]}}</td>
|
||||
<td>{{task.inputParameters[i-1]["desc"]}}</td>
|
||||
</tr>
|
||||
<tr v-if="task.inputParameters.length==0">
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
@ -83,25 +83,28 @@
|
||||
<table class="table table-bordered">
|
||||
<tbody>
|
||||
<tr>
|
||||
<th style="min-width: 50px;">ID</th>
|
||||
<th>{{"Parameter Name~参数名称" | lang}}</th>
|
||||
<th>{{"Parameter Type~参数类型" | lang}}</th>
|
||||
<th style="min-width: 50px; text-align: center">ID</th>
|
||||
<th style="text-align: center">{{"Parameter Name~参数名称" | lang}}</th>
|
||||
<th style="text-align: center">{{"Parameter Type~参数类型" | lang}}</th>
|
||||
<th>{{"Example Value~示例值" | lang}}</th>
|
||||
<th>{{"Parameter Description~参数描述" | lang}}</th>
|
||||
<th style="text-align: center">{{"Record as a field~作为字段保存" | lang}}</th>
|
||||
</tr>
|
||||
<tr v-if="task.outputParameters.length>0" v-for="i in task.outputParameters.length">
|
||||
<td style="min-width: 50px;">{{i}}</td>
|
||||
<td>{{task.outputParameters[i-1]["name"]}}</td>
|
||||
<td>{{task.outputParameters[i-1]["type"]}}</td>
|
||||
<td style="min-width: 50px; text-align: center">{{i}}</td>
|
||||
<td style="text-align: center">{{task.outputParameters[i-1]["name"]}}</td>
|
||||
<td style="text-align: center">{{task.outputParameters[i-1]["type"]}}</td>
|
||||
<td>{{task.outputParameters[i-1]["exampleValue"]}}</td>
|
||||
<td>{{task.outputParameters[i-1]["desc"]}}</td>
|
||||
<td style="text-align: center">{{task.outputParameters[i-1]["recordASField"] == 1? "Yes~是": "No~否" | lang}}</td>
|
||||
</tr>
|
||||
<tr v-if="task.outputParameters.length==0">
|
||||
<td style="min-width: 50px;">{{"Empty~无" | lang}}</td>
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td>{{"Empty~无" | lang}}</td>
|
||||
<td style="min-width: 50px;text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
<td style="text-align: center">{{"Empty~无" | lang}}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
1
ElectronJS/tasks/151.json
Normal file
1
ElectronJS/tasks/151.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/152.json
Normal file
1
ElectronJS/tasks/152.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/153.json
Normal file
1
ElectronJS/tasks/153.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
||||
"justMyCode": true,
|
||||
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
"args": ["--id", "[22]", "--headless", "0", "--user_data", "1"]
|
||||
"args": ["--id", "[69]", "--headless", "0", "--user_data", "1"]
|
||||
}
|
||||
]
|
||||
}
|
@ -42,7 +42,7 @@ from PIL import Image
|
||||
# import uuid
|
||||
from threading import Thread, Event
|
||||
from myChrome import MyChrome
|
||||
from utils import check_pause, download_image, get_output_code, isnull, myMySQL, write_to_csv, write_to_excel
|
||||
from utils import check_pause, download_image, get_output_code, isnull, myMySQL, new_line, write_to_csv, write_to_excel
|
||||
desired_capabilities = DesiredCapabilities.CHROME
|
||||
desired_capabilities["pageLoadStrategy"] = "none"
|
||||
|
||||
@ -58,20 +58,17 @@ class BrowserThread(Thread):
|
||||
self.saveName = service["saveName"] # 保存文件的名字
|
||||
except:
|
||||
now = datetime.now()
|
||||
# 将时间格式化为精确到毫秒的字符串
|
||||
self.saveName = now.strftime("%Y_%m_%d_%H_%M_%S_%f")
|
||||
# 将时间格式化为精确到秒的字符串
|
||||
self.saveName = now.strftime("%Y_%m_%d_%H_%M_%S")
|
||||
self.log = ""
|
||||
self.OUTPUT = ""
|
||||
self.SAVED = False
|
||||
|
||||
self.BREAK = False
|
||||
# 名称设定
|
||||
if saveName != "": # 命令行覆盖保存名称
|
||||
self.saveName = saveName # 保存文件的名字
|
||||
elif self.saveName == "Time":
|
||||
# 获取当前时间
|
||||
now = datetime.now()
|
||||
# 将时间格式化为精确到毫秒的字符串
|
||||
self.saveName = now.strftime("%Y_%m_%d_%H_%M_%S_%f")
|
||||
now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
|
||||
self.saveName = self.saveName.replace("current_time", now)
|
||||
|
||||
print("Save Name for task ID", i, "is:", self.saveName)
|
||||
print("任务ID", i, "的保存文件名为:", self.saveName)
|
||||
@ -121,37 +118,49 @@ class BrowserThread(Thread):
|
||||
self.links = list(
|
||||
filter(isnull, service["links"].split("\n"))) # 要执行的link的列表
|
||||
self.OUTPUT = [] # 采集的数据
|
||||
self.writeMode = 1 # 写入模式,0为新建,1为追加
|
||||
if self.outputFormat == "csv" or self.outputFormat == "txt":
|
||||
if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
|
||||
self.OUTPUT.append([]) # 添加表头
|
||||
self.writeMode = 0
|
||||
elif self.outputFormat == "xlsx":
|
||||
if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.xlsx'):
|
||||
self.OUTPUT.append([]) # 添加表头
|
||||
self.writeMode = 0
|
||||
elif self.outputFormat == "mysql":
|
||||
self.mysql = myMySQL(config["mysql_config_path"])
|
||||
self.mysql.create_table(self.saveName, service["outputParameters"])
|
||||
self.writeMode = 2
|
||||
if self.writeMode == 1:
|
||||
print("追加模式")
|
||||
print("Append Mode")
|
||||
elif self.writeMode == 0:
|
||||
print("新建模式")
|
||||
print("New Mode")
|
||||
elif self.writeMode == 2:
|
||||
print("MySQL模式")
|
||||
print("MySQL Mode")
|
||||
self.containJudge = service["containJudge"] # 是否含有判断语句
|
||||
tOut = service["outputParameters"] # 生成输出参数对象
|
||||
self.outputParameters = {}
|
||||
self.outputParametersTypes = []
|
||||
self.outputParametersRecord = [] # 字段是否被记录
|
||||
self.dataNotFoundKeys = {} # 记录没有找到数据的key
|
||||
self.log = "" # 记下现在总共开了多少个标签页
|
||||
self.history = {"index": 0, "handle": None} # 记录页面现在所以在的历史记录的位置
|
||||
self.SAVED = False # 记录是否已经存储了
|
||||
for para in tOut:
|
||||
for para in service["outputParameters"]: # 初始化输出参数
|
||||
if para["name"] not in self.outputParameters.keys():
|
||||
self.outputParameters[para["name"]] = ""
|
||||
self.dataNotFoundKeys[para["name"]] = False
|
||||
self.outputParametersTypes.append(para["type"])
|
||||
try:
|
||||
self.outputParametersRecord.append(bool(para["recordASField"]))
|
||||
except:
|
||||
self.outputParametersRecord.append(True)
|
||||
# 文件叠加的时候不添加表头
|
||||
if self.outputFormat == "csv" or self.outputFormat == "txt":
|
||||
if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
|
||||
if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx":
|
||||
if self.writeMode == 0:
|
||||
self.OUTPUT[0].append(para["name"])
|
||||
elif self.outputFormat == "xlsx":
|
||||
if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.xlsx'):
|
||||
self.OUTPUT[0].append(para["name"])
|
||||
elif self.outputFormat == "mysql": # MySQL不需要表头
|
||||
pass
|
||||
self.urlId = 0 # 全局记录变量
|
||||
self.preprocess() # 预处理,优化提取数据流程
|
||||
|
||||
@ -216,19 +225,21 @@ class BrowserThread(Thread):
|
||||
def saveData(self, exit=False):
|
||||
# 每save_threshold条保存一次
|
||||
if exit == True or len(self.OUTPUT) >= self.save_threshold:
|
||||
# 写入日志
|
||||
with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_log.txt', 'a', encoding='utf-8-sig') as file_obj:
|
||||
file_obj.write(self.log)
|
||||
file_obj.close()
|
||||
# 写入数据
|
||||
if self.outputFormat == "csv" or self.outputFormat == "txt":
|
||||
file_name = "Data/Task_" + \
|
||||
str(self.id) + "/" + self.saveName + '.' + self.outputFormat
|
||||
write_to_csv(file_name, self.OUTPUT)
|
||||
write_to_csv(file_name, self.OUTPUT, self.outputParametersRecord)
|
||||
elif self.outputFormat == "xlsx":
|
||||
file_name = "Data/Task_" + \
|
||||
str(self.id) + "/" + self.saveName + '.xlsx'
|
||||
write_to_excel(file_name, self.OUTPUT, self.outputParametersTypes)
|
||||
write_to_excel(file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord)
|
||||
elif self.outputFormat == "mysql":
|
||||
self.mysql.write_to_mysql(self.OUTPUT)
|
||||
self.mysql.write_to_mysql(self.OUTPUT, self.outputParametersRecord)
|
||||
|
||||
self.OUTPUT = []
|
||||
self.log = ""
|
||||
@ -360,17 +371,18 @@ class BrowserThread(Thread):
|
||||
except:
|
||||
output = ""
|
||||
print("JavaScript execution failed")
|
||||
else:
|
||||
elif codeMode == 3:
|
||||
self.BREAK = True
|
||||
else: # 0 1
|
||||
output = self.execute_code(
|
||||
codeMode, code, max_wait_time, iframe=paras["iframe"])
|
||||
recordASField = int(paras["recordASField"])
|
||||
recordASField = bool(paras["recordASField"])
|
||||
if recordASField:
|
||||
self.outputParameters[node["title"]] = output
|
||||
line = []
|
||||
for value in self.outputParameters.values():
|
||||
line.append(value)
|
||||
print(value[:self.maxViewLength], " ", end="")
|
||||
print("")
|
||||
print("操作<" + node["title"] + ">的返回值为:" + output)
|
||||
print("The return value of operation <" + node["title"] + "> is: " + output)
|
||||
self.outputParameters[node["title"]] = output
|
||||
if recordASField:
|
||||
line = new_line(self.outputParameters, self.maxViewLength, self.outputParametersRecord)
|
||||
self.OUTPUT.append(line)
|
||||
|
||||
def switchSelect(self, para, loopValue):
|
||||
@ -567,6 +579,12 @@ class BrowserThread(Thread):
|
||||
for i in node["sequence"]: # 挨个执行操作
|
||||
self.executeNode(
|
||||
i, element, node["parameters"]["xpath"], 0)
|
||||
if self.BREAK: # 如果有break操作,下面的操作不执行
|
||||
break
|
||||
if self.BREAK: # 如果有break操作,退出循环
|
||||
self.BREAK = False
|
||||
finished = True
|
||||
break
|
||||
finished = True
|
||||
self.Log("Click: ", node["parameters"]["xpath"])
|
||||
self.recordLog("Click:" + node["parameters"]["xpath"])
|
||||
@ -621,6 +639,11 @@ class BrowserThread(Thread):
|
||||
for i in node["sequence"]: # 挨个顺序执行循环里所有的操作
|
||||
self.executeNode(i, elements[index],
|
||||
node["parameters"]["xpath"], index)
|
||||
if self.BREAK:
|
||||
break
|
||||
if self.BREAK:
|
||||
self.BREAK = False
|
||||
break
|
||||
if self.browser.current_window_handle != thisHandle: # 如果执行完一次循环之后标签页的位置发生了变化
|
||||
while True: # 一直关闭窗口直到当前标签页
|
||||
self.browser.close() # 关闭使用完的标签页
|
||||
@ -662,6 +685,11 @@ class BrowserThread(Thread):
|
||||
By.XPATH, path, iframe=node["parameters"]["iframe"])
|
||||
for i in node["sequence"]: # 挨个执行操作
|
||||
self.executeNode(i, element, path, 0)
|
||||
if self.BREAK:
|
||||
break
|
||||
if self.BREAK:
|
||||
self.BREAK = False
|
||||
break
|
||||
if self.browser.current_window_handle != thisHandle: # 如果执行完一次循环之后标签页的位置发生了变化
|
||||
while True: # 一直关闭窗口直到当前标签页
|
||||
self.browser.close() # 关闭使用完的标签页
|
||||
@ -701,6 +729,11 @@ class BrowserThread(Thread):
|
||||
self.recordLog("input: " + text)
|
||||
for i in node["sequence"]: # 挨个执行操作
|
||||
self.executeNode(i, text, "", 0)
|
||||
if self.BREAK:
|
||||
break
|
||||
if self.BREAK:
|
||||
self.BREAK = False
|
||||
break
|
||||
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
||||
output = self.execute_code(int(
|
||||
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
|
||||
@ -719,6 +752,11 @@ class BrowserThread(Thread):
|
||||
self.recordLog("input: " + url)
|
||||
for i in node["sequence"]:
|
||||
self.executeNode(i, url, "", 0)
|
||||
if self.BREAK:
|
||||
break
|
||||
if self.BREAK:
|
||||
self.BREAK = False
|
||||
break
|
||||
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
|
||||
output = self.execute_code(int(
|
||||
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
|
||||
@ -738,6 +776,11 @@ class BrowserThread(Thread):
|
||||
break
|
||||
for i in node["sequence"]: # 挨个执行操作
|
||||
self.executeNode(i, code, node["parameters"]["xpath"], 0)
|
||||
if self.BREAK:
|
||||
break
|
||||
if self.BREAK:
|
||||
self.BREAK = False
|
||||
break
|
||||
self.history["index"] = thisHistoryLength
|
||||
self.history["handle"] = self.browser.current_window_handle
|
||||
self.scrollDown(node["parameters"])
|
||||
@ -1071,6 +1114,8 @@ class BrowserThread(Thread):
|
||||
elif p["contentType"] == 9:
|
||||
content = self.execute_code(
|
||||
2, p["JS"], p["JSWaitTime"], element, iframe=p["iframe"])
|
||||
elif p["contentType"] == 12: # 系统命令返回值
|
||||
content = self.execute_code(1, p["JS"], p["JSWaitTime"])
|
||||
elif p["contentType"] == 10: # 下拉框选中的值
|
||||
try:
|
||||
select_element = Select(element)
|
||||
@ -1254,11 +1299,7 @@ class BrowserThread(Thread):
|
||||
self.outputParameters[p["name"]] = content
|
||||
self.execute_code(
|
||||
2, p["afterJS"], p["afterJSWaitTime"], element, iframe=p["iframe"]) # 执行后置JS
|
||||
line = []
|
||||
for value in self.outputParameters.values():
|
||||
line.append(value)
|
||||
print(value[:self.maxViewLength], " ", end="")
|
||||
print("")
|
||||
line = new_line(self.outputParameters, self.maxViewLength, self.outputParametersRecord)
|
||||
self.OUTPUT.append(line)
|
||||
# rt.end()
|
||||
|
||||
|
@ -87,16 +87,30 @@ def get_output_code(output):
|
||||
def isnull(s):
|
||||
return len(s) != 0
|
||||
|
||||
def new_line(outputParameters, maxViewLength, record):
|
||||
line = []
|
||||
i = 0
|
||||
for value in outputParameters.values():
|
||||
line.append(value)
|
||||
if record[i]:
|
||||
print(value[:maxViewLength], " ", end="")
|
||||
i += 1
|
||||
print("")
|
||||
return line
|
||||
|
||||
def write_to_csv(file_name, data):
|
||||
def write_to_csv(file_name, data, record):
|
||||
with open(file_name, 'a', encoding='utf-8-sig', newline="") as f:
|
||||
f_csv = csv.writer(f)
|
||||
for line in data:
|
||||
f_csv.writerow(line)
|
||||
to_write = []
|
||||
for i in range(len(line)):
|
||||
if record[i]:
|
||||
to_write.append(line[i])
|
||||
f_csv.writerow(to_write)
|
||||
f.close()
|
||||
|
||||
|
||||
def write_to_excel(file_name, data, types):
|
||||
def write_to_excel(file_name, data, types, record):
|
||||
first = False
|
||||
if os.path.exists(file_name):
|
||||
# 加载现有的工作簿
|
||||
@ -111,7 +125,7 @@ def write_to_excel(file_name, data, types):
|
||||
for line in data:
|
||||
if not first: # 如果不是第一行,需要转换数据类型
|
||||
for i in range(len(line)):
|
||||
if types[i] == "int":
|
||||
if types[i] == "int" or types[i] == "bigInt":
|
||||
try:
|
||||
line[i] = int(line[i])
|
||||
except:
|
||||
@ -123,7 +137,11 @@ def write_to_excel(file_name, data, types):
|
||||
line[i] = 0.0
|
||||
else:
|
||||
first = False
|
||||
ws.append(line)
|
||||
to_write = []
|
||||
for i in range(len(line)):
|
||||
if record[i]:
|
||||
to_write.append(line[i])
|
||||
ws.append(to_write)
|
||||
# 保存工作簿
|
||||
wb.save(file_name)
|
||||
|
||||
@ -174,26 +192,29 @@ class myMySQL:
|
||||
|
||||
sql = "CREATE TABLE " + table_name + " (_id INT AUTO_INCREMENT PRIMARY KEY, "
|
||||
for item in parameters:
|
||||
name = item['name']
|
||||
if item['type'] == 'int':
|
||||
sql += f"{name} INT, "
|
||||
elif item['type'] == 'double':
|
||||
sql += f"{name} DOUBLE, "
|
||||
elif item['type'] == 'text':
|
||||
sql += f"{name} TEXT, "
|
||||
elif item['type'] == 'mediumText':
|
||||
sql += f"{name} MEDIUMTEXT, "
|
||||
elif item['type'] == 'longText':
|
||||
sql += f"{name} LONGTEXT, "
|
||||
elif item['type'] == 'datetime':
|
||||
sql += f"{name} DATETIME, "
|
||||
elif item['type'] == 'date':
|
||||
sql += f"{name} DATE, "
|
||||
elif item['type'] == 'time':
|
||||
sql += f"{name} TIME, "
|
||||
elif item['type'] == 'varchar':
|
||||
sql += f"{name} VARCHAR(255), "
|
||||
self.field_sql += f"{name}, "
|
||||
if item["recordASField"]:
|
||||
name = item['name']
|
||||
if item['type'] == 'int':
|
||||
sql += f"{name} INT, "
|
||||
elif item['type'] == 'double':
|
||||
sql += f"{name} DOUBLE, "
|
||||
elif item['type'] == 'text':
|
||||
sql += f"{name} TEXT, "
|
||||
elif item['type'] == 'mediumText':
|
||||
sql += f"{name} MEDIUMTEXT, "
|
||||
elif item['type'] == 'longText':
|
||||
sql += f"{name} LONGTEXT, "
|
||||
elif item['type'] == 'datetime':
|
||||
sql += f"{name} DATETIME, "
|
||||
elif item['type'] == 'date':
|
||||
sql += f"{name} DATE, "
|
||||
elif item['type'] == 'time':
|
||||
sql += f"{name} TIME, "
|
||||
elif item['type'] == 'varchar':
|
||||
sql += f"{name} VARCHAR(255), "
|
||||
elif item['type'] == 'bigInt':
|
||||
sql += f"{name} BIGINT, "
|
||||
self.field_sql += f"{name}, "
|
||||
# 移除最后的逗号并添加闭合的括号
|
||||
sql = sql.rstrip(', ') + ")"
|
||||
self.field_sql = self.field_sql.rstrip(', ') + ")"
|
||||
@ -207,19 +228,31 @@ class myMySQL:
|
||||
print("The data table " + table_name + " already exists.")
|
||||
cursor.close()
|
||||
|
||||
def write_to_mysql(self, OUTPUT):
|
||||
def write_to_mysql(self, OUTPUT, record):
|
||||
# 创建一个游标对象
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
for row in OUTPUT:
|
||||
to_write = []
|
||||
for i in range(len(row)):
|
||||
if record[i]:
|
||||
to_write.append(row[i])
|
||||
# 构造插入数据的 SQL 语句
|
||||
sql = f"INSERT INTO "+ self.table_name +" "+self.field_sql+" VALUES ("
|
||||
for item in row:
|
||||
for item in to_write:
|
||||
sql += "%s, "
|
||||
# 移除最后的逗号并添加闭合的括号
|
||||
sql = sql.rstrip(', ') + ")"
|
||||
# 执行 SQL 语句
|
||||
cursor.execute(sql, row)
|
||||
try:
|
||||
cursor.execute(sql, to_write)
|
||||
except Exception as e:
|
||||
print("Error:", e)
|
||||
# print("Error SQL:", sql)
|
||||
print("插入数据库错误,请查看以上的错误提示,然后检查数据的类型是否正确,是否文本过长(超过一万的文本类型要设置为大文本)。")
|
||||
print("Inserting database error, please check the above error, and then check whether the data type is correct, whether the text is too long (text type over 10,000 should be set to large text).")
|
||||
print("重新执行任务时,请删除数据库中的数据表" + self.table_name + ",然后再次运行程序。")
|
||||
print("When re-executing the task, please delete the data table " + self.table_name + " in the database, and then run the program again.")
|
||||
|
||||
# 提交到数据库执行
|
||||
self.conn.commit()
|
||||
|
@ -70,6 +70,7 @@
|
||||
height: 30px;
|
||||
cursor: pointer;
|
||||
margin-left: 15px !important;
|
||||
padding: 0!important;
|
||||
}
|
||||
|
||||
.tooltips input[type=text] {
|
||||
|
Loading…
x
Reference in New Issue
Block a user