可选是否生成新行以解决多行问题

This commit is contained in:
naibo 2023-09-05 19:55:32 +08:00
parent 3a3edd3558
commit 5921d84140
9 changed files with 49 additions and 718 deletions

View File

@ -1,706 +0,0 @@
Server has started.
server_address: http://localhost:8074
x64
D:\Documents\Projects\EasySpider\.temp_to_pub\EasySpider_windows_x64\EasySpider\resources\app\chrome_win64\chromedriver_win64.exe D:\Documents\Projects\EasySpider\.temp_to_pub\EasySpider_windows_x64\EasySpider\resources\app\chrome_win64\chrome.exe D:\Documents\Projects\EasySpider\.temp_to_pub\EasySpider_windows_x64\EasySpider\resources\app\chrome_win64\execute.bat
win32
Server listening on http://localhost:8075
Not Windows 7
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 0 } }
set socket_window
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: { type: 0, message: { id: 'C91060802', title: 'Start' } }
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: { type: 0, message: { id: 'C42837719', title: 'New Task' } }
Set handle_pair for id: C91060802 to DE25B0D62688DC2CEAA4858701D1C25A , title is: Start
GET A MESSAGE: {
type: 3,
from: 0,
message: {
pipe: '{"type":"openPage","url":"https://www.jd.com","links":"https://www.jd.com"}'
}
}
FROM Browser: {
type: 'openPage',
url: 'https://www.jd.com',
links: 'https://www.jd.com'
}
Iframe: undefined
GET A MESSAGE: { type: 1, message: { id: -1 } }
Window {
id: 26086070,
processId: 138328,
path: 'D:\\Documents\\Projects\\EasySpider\\.temp_to_pub\\EasySpider_windows_x64\\EasySpider\\resources\\app\\chrome_win64\\chrome.exe'
}
GET A MESSAGE: { type: 0, message: { id: 'C73544475', title: '京东全球版-专业的综合网上购物商城' } }
Set handle_pair for id: C42837719 to DE25B0D62688DC2CEAA4858701D1C25A , title is: New Task
Set handle_pair for id: C73544475 to DE25B0D62688DC2CEAA4858701D1C25A , title is: 京东全球版-专业的综合网上购物商城
GET A MESSAGE: {
type: 3,
from: 0,
message: {
pipe: `{"type":"singleCollect","history":4,"tabIndex":-1,"iframe":false,"parameters":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[6]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","allXPaths":["/html/body/div[6]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG LeftSide_menu_hover__OCHiO']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"],"exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"16i23xj4p54lm5zgdca","iframe":false}]}`
}
}
FROM Browser: {
type: 'singleCollect',
history: 4,
tabIndex: -1,
iframe: false,
parameters: [
{
nodeType: 0,
contentType: 0,
relative: false,
name: '参数1_文本',
desc: '',
extractType: 0,
relativeXPath: '/html/body/div[6]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]',
allXPaths: [Array],
exampleValues: [Array],
unique_index: '16i23xj4p54lm5zgdca',
iframe: false
}
]
}
Iframe: false
GET A MESSAGE: { type: 3, from: 1, message: { pipe: '{"type":0,"value":2}' } }
FROM Flowchart: { type: 0, value: 2 }
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: {
type: 5,
message: {
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
}
{
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
0.json
1.json
10.json
11.json
12.json
13.json
14.json
15.json
16.json
17.json
18.json
19.json
2.json
20.json
21.json
22.json
23.json
24.json
25.json
26.json
27.json
28.json
29.json
3.json
30.json
31.json
32.json
33.json
34.json
35.json
36.json
37.json
38.json
39.json
4.json
40.json
41.json
42.json
43.json
44.json
5.json
6.json
7.json
8.json
9.json
GET A MESSAGE: {
type: 5,
message: {
id: 45,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
}
{
id: 45,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
D:\Documents\Projects\EasySpider\.temp_to_pub\EasySpider_windows_x64>if exist EasySpider (start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [45] --server_address http://localhost:8074 --user_data 0 ) else (if exist resources (
cd ../
start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [45] --server_address http://localhost:8074 --user_data 0
) else (start chrome_win64/easyspider_executestage.exe --id [45] --server_address http://localhost:8074 --user_data 0 ) )
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: {
type: 5,
message: {
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
}
{
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
0.json
1.json
10.json
11.json
12.json
13.json
14.json
15.json
16.json
17.json
18.json
19.json
2.json
20.json
21.json
22.json
23.json
24.json
25.json
26.json
27.json
28.json
29.json
3.json
30.json
31.json
32.json
33.json
34.json
35.json
36.json
37.json
38.json
39.json
4.json
40.json
41.json
42.json
43.json
44.json
45.json
5.json
6.json
7.json
8.json
9.json
GET A MESSAGE: {
type: 5,
message: {
id: 46,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
}
{
id: 46,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
D:\Documents\Projects\EasySpider\.temp_to_pub\EasySpider_windows_x64>if exist EasySpider (start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [46] --server_address http://localhost:8074 --user_data 0 ) else (if exist resources (
cd ../
start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [46] --server_address http://localhost:8074 --user_data 0
) else (start chrome_win64/easyspider_executestage.exe --id [46] --server_address http://localhost:8074 --user_data 0 ) )
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: {
type: 5,
message: {
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
}
{
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
0.json
1.json
10.json
11.json
12.json
13.json
14.json
15.json
16.json
17.json
18.json
19.json
2.json
20.json
21.json
22.json
23.json
24.json
25.json
26.json
27.json
28.json
29.json
3.json
30.json
31.json
32.json
33.json
34.json
35.json
36.json
37.json
38.json
39.json
4.json
40.json
41.json
42.json
43.json
44.json
45.json
46.json
5.json
6.json
7.json
8.json
9.json
GET A MESSAGE: {
type: 5,
message: {
id: 47,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
}
{
id: 47,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
D:\Documents\Projects\EasySpider\.temp_to_pub\EasySpider_windows_x64>if exist EasySpider (start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [47] --server_address http://localhost:8074 --user_data 0 ) else (if exist resources (
cd ../
start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [47] --server_address http://localhost:8074 --user_data 0
) else (start chrome_win64/easyspider_executestage.exe --id [47] --server_address http://localhost:8074 --user_data 0 ) )
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: {
type: 5,
message: {
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
}
{
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
0.json
1.json
10.json
11.json
12.json
13.json
14.json
15.json
16.json
17.json
18.json
19.json
2.json
20.json
21.json
22.json
23.json
24.json
25.json
26.json
27.json
28.json
29.json
3.json
30.json
31.json
32.json
33.json
34.json
35.json
36.json
37.json
38.json
39.json
4.json
40.json
41.json
42.json
43.json
44.json
45.json
46.json
47.json
5.json
6.json
7.json
8.json
9.json
GET A MESSAGE: {
type: 5,
message: {
id: 48,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
}
{
id: 48,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
D:\Documents\Projects\EasySpider\.temp_to_pub\EasySpider_windows_x64>if exist EasySpider (start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [48] --server_address http://localhost:8074 --user_data 0 ) else (if exist resources (
cd ../
start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [48] --server_address http://localhost:8074 --user_data 0
) else (start chrome_win64/easyspider_executestage.exe --id [48] --server_address http://localhost:8074 --user_data 0 ) )
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: {
type: 5,
message: {
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
}
{
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
0.json
1.json
10.json
11.json
12.json
13.json
14.json
15.json
16.json
17.json
18.json
19.json
2.json
20.json
21.json
22.json
23.json
24.json
25.json
26.json
27.json
28.json
29.json
3.json
30.json
31.json
32.json
33.json
34.json
35.json
36.json
37.json
38.json
39.json
4.json
40.json
41.json
42.json
43.json
44.json
45.json
46.json
47.json
48.json
5.json
6.json
7.json
8.json
9.json
GET A MESSAGE: {
type: 5,
message: {
id: 49,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
}
{
id: 49,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
D:\Documents\Projects\EasySpider\.temp_to_pub\EasySpider_windows_x64>if exist EasySpider (start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [49] --server_address http://localhost:8074 --user_data 0 ) else (if exist resources (
cd ../
start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [49] --server_address http://localhost:8074 --user_data 0
) else (start chrome_win64/easyspider_executestage.exe --id [49] --server_address http://localhost:8074 --user_data 0 ) )
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: {
type: 5,
message: {
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
}
{
id: -1,
user_data_folder: '',
mysql_config_path: '-1',
execute_type: 1
}
0.json
1.json
10.json
11.json
12.json
13.json
14.json
15.json
16.json
17.json
18.json
19.json
2.json
20.json
21.json
22.json
23.json
24.json
25.json
26.json
27.json
28.json
29.json
3.json
30.json
31.json
32.json
33.json
34.json
35.json
36.json
37.json
38.json
39.json
4.json
40.json
41.json
42.json
43.json
44.json
45.json
46.json
47.json
48.json
49.json
5.json
6.json
7.json
8.json
9.json
GET A MESSAGE: {
type: 5,
message: {
id: 50,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
}
{
id: 50,
user_data_folder: '',
mysql_config_path: './mysql_config.json',
execute_type: 1
}
D:\Documents\Projects\EasySpider\.temp_to_pub\EasySpider_windows_x64>if exist EasySpider (start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [50] --server_address http://localhost:8074 --user_data 0 ) else (if exist resources (
cd ../
start EasySpider/resources/app/chrome_win64/easyspider_executestage.exe --id [50] --server_address http://localhost:8074 --user_data 0
) else (start chrome_win64/easyspider_executestage.exe --id [50] --server_address http://localhost:8074 --user_data 0 ) )
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 0 } }
set socket_window
GET A MESSAGE: { type: 0, message: { id: 1 } }
set socket_start
GET A MESSAGE: { type: 0, message: { id: 'C33333281', title: 'Start' } }
GET A MESSAGE: { type: 0, message: { id: 'C71067632', title: 'Task Information' } }
Set handle_pair for id: C33333281 to C16928DEB37478FE68A811407561BFB2 , title is: Start
GET A MESSAGE: { type: 0, message: { id: 2 } }
set socket_flowchart
GET A MESSAGE: { type: 0, message: { id: 'C91491330', title: '设计流程' } }
Set handle_pair for id: C71067632 to C16928DEB37478FE68A811407561BFB2 , title is: Task Information
Set handle_pair for id: C91491330 to C16928DEB37478FE68A811407561BFB2 , title is: 设计流程
GET A MESSAGE: { type: 6 }
open devtools error

View File

@ -195,6 +195,7 @@
<button class="btn btn-primary" v-on:mousedown= 'addPara'>New Field</button>
</p>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='nowNode["parameters"]["clear"]'></input>Clear other field existing values before extracting</p>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='nowNode["parameters"]["newLine"]'></input>This operation will generate a new row of data: </p>
<div class="toolkitcontain">
<table class="toolkittb2" cellspacing="0">
<tbody>
@ -426,6 +427,13 @@ Please note that this feature does not support assigning values to variables. In
<option :value = 1>Yes</option>
</select>
</p>
<p>
<label>This operation will generate a new row of data: </label>
<select v-model='nowNode["parameters"]["newLine"]' class="form-control">
<option :value = 1>Yes</option>
<option :value = 0>No</option>
</select>
</p>
<label>Maximum wait time for script execution (0 represents unlimited wait time): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>

View File

@ -698,16 +698,16 @@ document.oncontextmenu = function() {
// 创建一个包含删除选项的右键菜单
let contextMenu = document.createElement("div");
contextMenu.id = "contextMenu";
contextMenu.innerHTML = `<div>${LANG("删除元素", "Delete Element")}`;
contextMenu.innerHTML = `<div>${LANG("删除元素(双击)", "Delete Element (Double Click)")}`;
// 设置右键菜单的样式
contextMenu.style.position = "absolute";
contextMenu.style.left = event.clientX + "px";
contextMenu.style.top = event.clientY + "px";
contextMenu.style.width = LANG("140px", "180px");
contextMenu.style.width = LANG("180px", "220px");
// 添加删除元素的功能
contextMenu.addEventListener("click", function() {
contextMenu.addEventListener("dblclick", function() {
// myElement.remove(); // 删除元素
deleteElement();
contextMenu.remove(); // 删除右键菜单

View File

@ -195,6 +195,7 @@
<button class="btn btn-primary" v-on:mousedown= 'addPara'>新增字段</button>
</p>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='nowNode["parameters"]["clear"]'></input>提取数据前清空其他操作字段已记录的值</p>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='nowNode["parameters"]["newLine"]'></input>此提取数据操作后生成新数据行 <span style="font-size: 30px!important;" title="取消勾选则适用于不想每次提取操作都生成一个新行的场景"></span></p>
<div class="toolkitcontain">
<table class="toolkittb2" cellspacing="0">
<tbody>
@ -428,6 +429,13 @@ print(emotlib.emoji()) # 使用其中的函数。
<option :value = 1></option>
</select>
</p>
<p>
<label>此提取数据操作后生成新数据行:</label>
<select v-model='nowNode["parameters"]["newLine"]' class="form-control">
<option :value = 1></option>
<option :value = 0></option>
</select>
</p>
<label>最长等待脚本执行时间0代表无限等待 </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>

View File

@ -193,6 +193,7 @@ function addParameters(t) {
t["parameters"]["afterJSWaitTime"] = 0; //执行后js等待时间
} else if (t.option == 3) { //提取数据
t["parameters"]["clear"] = 0; //清空其他字段数据
t["parameters"]["newLine"] = 1; //生成新行
t["parameters"]["paras"] = []; //默认参数列表
} else if (t.option == 4) { //输入文字
t["parameters"]["value"] = "";
@ -203,6 +204,7 @@ function addParameters(t) {
t["parameters"]["afterJSWaitTime"] = 0; //执行后js等待时间
} else if(t.option == 5) { //自定义操作
t["parameters"]["clear"] = 0; //清空其他字段数据
t["parameters"]["newLine"] = 1; //生成新行
t["parameters"]["codeMode"] = 0; //代码模式0代表JS, 2代表系统级别
t["parameters"]["code"] = "";
t["parameters"]["waitTime"] = 0; //最长等待时间

File diff suppressed because one or more lines are too long

View File

@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--id", "[2]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
"args": ["--id", "[36]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
}
]
}

View File

@ -242,6 +242,10 @@ class BrowserThread(Thread):
clear = node["parameters"]["clear"]
except:
node["parameters"]["clear"] = 0
try:
newLine = node["parameters"]["newLine"]
except:
node["parameters"]["newLine"] = 1
for para in paras:
try:
iframe = para["iframe"]
@ -274,6 +278,10 @@ class BrowserThread(Thread):
clear = node["parameters"]["clear"]
except:
node["parameters"]["clear"] = 0
try:
newLine = node["parameters"]["newLine"]
except:
node["parameters"]["newLine"] = 1
elif node["option"] == 7: # 移动到元素
if node["parameters"]["useLoop"]:
if self.task_version <= "0.3.5":
@ -618,7 +626,7 @@ class BrowserThread(Thread):
# self.print_and_log("操作<" + node["title"] + ">的返回值为:" + output)
# self.print_and_log("The return value of operation <" + node["title"] + "> is: " + output)
self.outputParameters[node["title"]] = output
if recordASField:
if recordASField and paras["newLine"]:
line = new_line(self.outputParameters,
self.maxViewLength, self.outputParametersRecord)
self.OUTPUT.append(line)
@ -1746,7 +1754,7 @@ class BrowserThread(Thread):
self.outputParameters[p["name"]] = content
self.execute_code(
2, p["afterJS"], p["afterJSWaitTime"], element, iframe=p["iframe"]) # 执行后置JS
if para["recordASField"] > 0:
if para["recordASField"] > 0 and para["newLine"]:
line = new_line(self.outputParameters,
self.maxViewLength, self.outputParametersRecord)
self.OUTPUT.append(line)

View File

@ -290,19 +290,22 @@ class myMySQL:
print("MySQL config file path: ", config_file)
with open(config_file, 'r') as f:
config = json.load(f)
host = config["host"]
port = config["port"]
user = config["username"]
passwd = config["password"]
db = config["database"]
self.host = config["host"]
self.port = config["port"]
self.user = config["username"]
self.passwd = config["password"]
self.db = config["database"]
except Exception as e:
print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在,或配置信息是否有误。")
print("Failed to read configuration file, please check if the configuration file: " +
config_file+" exists, or if the configuration information is incorrect.")
print(e)
self.connect()
def connect(self):
try:
self.conn = pymysql.connect(
host=host, port=port, user=user, passwd=passwd, db=db)
host=self.host, port=self.port, user=self.user, passwd=self.passwd, db=self.db)
print("成功连接到数据库。")
print("Successfully connected to the database.")
except:
@ -408,6 +411,13 @@ class myMySQL:
# 执行 SQL 语句
try:
cursor.execute(sql, to_write)
except pymysql.OperationalError as e:
print("Error:", e)
print("Try to reconnect to the database...")
self.connect()
cursor = self.conn.cursor() # 重新创建游标对象
cursor.execute(sql, to_write) # 重新执行SQL语句
# self.write_to_mysql(OUTPUT, record, types)
except Exception as e:
print("Error:", e)
print("Error SQL:", sql, to_write)