New Features

This commit is contained in:
naibo 2023-12-17 13:14:47 +08:00
parent 8309eb5787
commit ea2d679dd4
38 changed files with 148 additions and 7 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"id":19,"name":"","url":"https://t.zsxq.com/15aUTk4Oa","links":"https://t.zsxq.com/15aUTk4Oa","create_time":"12/17/2023, 12:12:12 PM","update_time":"12/17/2023, 12:14:27 PM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","desc":"https://t.zsxq.com/15aUTk4Oa","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://t.zsxq.com/15aUTk4Oa","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://t.zsxq.com/15aUTk4Oa"}],"outputParameters":[{"id":0,"name":"执行JavaScript","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://t.zsxq.com/15aUTk4Oa","links":"https://t.zsxq.com/15aUTk4Oa","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0}},{"id":2,"index":3,"parentId":0,"type":0,"option":2,"title":"点击2023程序...","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":15,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"main-content-container\")]/app-topic[3]/div[1]/div[1]/div[1]/app-talk-content[1]/div[1]/app-file-gallery[1]/div[1]/div[2]/div[2]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/app-root[1]/app-index[1]/div[1]/app-topic-flow[1]/div[1]/app-main-content[1]/div[1]/app-topic[3]/div[1]/div[1]/div[1]/app-talk-content[1]/div[1]/app-file-gallery[1]/div[1]/div[2]/div[2]","//div[contains(., '2023程序员人群洞')]","//DIV[@class='file-name']","/html/body/app-root/app-index/div/app-topic-flow/div/app-main-content/div/app-topic[last()-17]/div/div/div[last()-1]/app-talk-content/div/app-file-gallery/div/div/div"]}},{"id":3,"index":4,"parentId":0,"type":0,"option":5,"title":"执行JavaScript","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"codeMode":0,"code":"document.elementFromPoint(20,20).click();","waitTime":0,"recordASField":0,"paraType":"text","emailConfig":{"host":"","port":465,"username":"","password":"","from":"","to":"","subject":"","content":""}}}]}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"id":20,"name":"","url":"https://t.zsxq.com/15aUTk4Oa","links":"https://www.zsxq.com","create_time":"12/17/2023, 12:12:12 PM","update_time":"12/17/2023, 12:14:27 PM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","desc":"https://t.zsxq.com/15aUTk4Oa","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://t.zsxq.com/15aUTk4Oa","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://t.zsxq.com/15aUTk4Oa"}],"outputParameters":[{"id":0,"name":"执行JavaScript","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://t.zsxq.com/15aUTk4Oa","links":"https://www.zsxq.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0}},{"id":2,"index":3,"parentId":0,"type":0,"option":2,"title":"点击2023程序...","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":15,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"main-content-container\")]/app-topic[3]/div[1]/div[1]/div[1]/app-talk-content[1]/div[1]/app-file-gallery[1]/div[1]/div[2]/div[2]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/app-root[1]/app-index[1]/div[1]/app-topic-flow[1]/div[1]/app-main-content[1]/div[1]/app-topic[3]/div[1]/div[1]/div[1]/app-talk-content[1]/div[1]/app-file-gallery[1]/div[1]/div[2]/div[2]","//div[contains(., '2023程序员人群洞')]","//DIV[@class='file-name']","/html/body/app-root/app-index/div/app-topic-flow/div/app-main-content/div/app-topic[last()-17]/div/div/div[last()-1]/app-talk-content/div/app-file-gallery/div/div/div"]}},{"id":3,"index":4,"parentId":0,"type":0,"option":5,"title":"执行JavaScript","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"codeMode":0,"code":"document.elementFromPoint(20,20).click();","waitTime":0,"recordASField":0,"paraType":"text","emailConfig":{"host":"","port":465,"username":"","password":"","from":"","to":"","subject":"","content":""}}}]}

View File

@ -0,0 +1 @@
{"id":21,"name":"","url":"https://t.zsxq.com/15aUTk4Oa","links":"https://www.zsxq.com","create_time":"12/17/2023, 12:12:12 PM","update_time":"12/17/2023, 12:17:41 PM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","desc":"https://t.zsxq.com/15aUTk4Oa","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zsxq.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.zsxq.com"}],"outputParameters":[{"id":0,"name":"执行JavaScript","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://t.zsxq.com/15aUTk4Oa","links":"https://www.zsxq.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0}},{"id":2,"index":3,"parentId":0,"type":0,"option":2,"title":"点击2023程序...","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":15,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"main-content-container\")]/app-topic[3]/div[1]/div[1]/div[1]/app-talk-content[1]/div[1]/app-file-gallery[1]/div[1]/div[2]/div[2]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/app-root[1]/app-index[1]/div[1]/app-topic-flow[1]/div[1]/app-main-content[1]/div[1]/app-topic[3]/div[1]/div[1]/div[1]/app-talk-content[1]/div[1]/app-file-gallery[1]/div[1]/div[2]/div[2]","//div[contains(., '2023程序员人群洞')]","//DIV[@class='file-name']","/html/body/app-root/app-index/div/app-topic-flow/div/app-main-content/div/app-topic[last()-17]/div/div/div[last()-1]/app-talk-content/div/app-file-gallery/div/div/div"]}},{"id":3,"index":4,"parentId":0,"type":0,"option":5,"title":"执行JavaScript","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"codeMode":0,"code":"document.elementFromPoint(20,20).click();","waitTime":0,"recordASField":0,"paraType":"text","emailConfig":{"host":"","port":465,"username":"","password":"","from":"","to":"","subject":"","content":""}}}]}

View File

@ -0,0 +1 @@
{"id":22,"name":"","url":"https://t.zsxq.com/15aUTk4Oa","links":"https://wx.zsxq.com/dweb2","create_time":"12/17/2023, 12:12:12 PM","update_time":"12/17/2023, 12:18:23 PM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","desc":"https://t.zsxq.com/15aUTk4Oa","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://wx.zsxq.com/dweb2","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://wx.zsxq.com/dweb2"}],"outputParameters":[{"id":0,"name":"执行JavaScript","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://t.zsxq.com/15aUTk4Oa","links":"https://wx.zsxq.com/dweb2","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0}},{"id":2,"index":3,"parentId":0,"type":0,"option":2,"title":"点击2023程序...","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":15,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"main-content-container\")]/app-topic[3]/div[1]/div[1]/div[1]/app-talk-content[1]/div[1]/app-file-gallery[1]/div[1]/div[2]/div[2]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/app-root[1]/app-index[1]/div[1]/app-topic-flow[1]/div[1]/app-main-content[1]/div[1]/app-topic[3]/div[1]/div[1]/div[1]/app-talk-content[1]/div[1]/app-file-gallery[1]/div[1]/div[2]/div[2]","//div[contains(., '2023程序员人群洞')]","//DIV[@class='file-name']","/html/body/app-root/app-index/div/app-topic-flow/div/app-main-content/div/app-topic[last()-17]/div/div/div[last()-1]/app-talk-content/div/app-file-gallery/div/div/div"]}},{"id":3,"index":4,"parentId":0,"type":0,"option":5,"title":"执行JavaScript","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"codeMode":0,"code":"document.elementFromPoint(20,20).click();","waitTime":0,"recordASField":0,"paraType":"text","emailConfig":{"host":"","port":465,"username":"","password":"","from":"","to":"","subject":"","content":""}}}]}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -690,7 +690,7 @@ If the expression returns a value greater than 0 or evaluates to True, the opera
<input spellcheck=false onkeydown="inputDelete(event)" id="serviceDescription" name="serviceDescription" class="form-control"></input>
<label>Export Data Format (Excel/CSV/TXT/Database):</label>
<select id="outputFormat" class="form-control">
<option value = "xlsx">XLSX (EXCEL, note that a single Excel cell can save up to 32767 characters)</option>
<option value = "xlsx">XLSX (EXCEL, we suggest using the CSV format if the length of a single cell exceeds 500)</option>
<option value = "csv">CSV</option>
<option value = "txt">TXT</option>
<option value = "json">JSON</option>

View File

@ -690,7 +690,7 @@ print(emotlib.emoji()) # 使用其中的函数。
<input spellcheck=false onkeydown="inputDelete(event)" id="serviceDescription" name="serviceDescription" class="form-control"></input>
<label>导出数据格式Excel/CSV/TXT/数据库,<a href="https://www.bilibili.com/video/BV1os4y1679S/" target="_blank">查看MySQL操作教程</a></label>
<select id="outputFormat" class="form-control">
<option value = "xlsx">XLSX即EXCEL文件注意Excel单个单元格最多可存储32767字符</option>
<option value = "xlsx">XLSX即EXCEL文件建议单个单元格长度超过500时使用CSV格式存储</option>
<option value = "csv">CSV</option>
<option value = "txt">TXT</option>
<option value = "json">JSON</option>

View File

@ -55,7 +55,12 @@ function changeOutputFormat(param) {
if (len > 20000) {
if ($("#outputFormat").val() == "xlsx") {
$("#outputFormat").val("csv"); //如果有一个参数的示例值长度超过20000就默认输出为csv
showInfo(LANG("示例值长度超过16000超出Excel单个单元格存储限制已自动切换保存为csv格式。", "The length of the example value exceeds 16000, and the csv save format has been automatically switched."));
showInfo(LANG("单个字段示例值长度超过16000超出Excel单个单元格存储限制已自动切换保存为csv格式。", "The length of the example value of a single field exceeds 16000, which exceeds the storage limit of a single cell of Excel, and has been automatically switched to save as csv format."), 5000);
}
break;
} else if (len > 500) {
if ($("#outputFormat").val() == "xlsx") {
showInfo(LANG("单个字段示例值长度超过300建议保存为CSV格式否则可能会出现数据存储不完整的情况Python Excel写入库openpyxl的Bug。", "The length of the example value of a single field exceeds 300, it is recommended to save as CSV format, otherwise there may be a situation where the data storage is incomplete (Bug of Python Excel write library openpyxl)."), 10000);
}
break;
}

View File

@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--ids", "[8]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"args": ["--ids", "[28]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"--read_type", "remote"]
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
}

View File

@ -3,6 +3,7 @@ requests==2.31.0
selenium==4.16.0
pyinstaller==5.13.2
Pillow==10.0.1
xlsxwriter==3.1.9
openpyxl==3.1.2
pymysql==1.1.0
lxml==4.9.2

View File

@ -9,6 +9,8 @@ import time
import uuid
# import keyboard
from openpyxl import Workbook, load_workbook
import pandas as pd
import xlsxwriter
import requests
from urllib.parse import urlparse
import pymysql
@ -336,11 +338,115 @@ def write_to_json(file_name, data, types, record, keys):
def write_to_excel(file_name, data, types, record):
# 首先,检查文件是否存在来决定是否处理第一行
# first = not os.path.exists(file_name)
# # 准备新数据
# new_data = pd.DataFrame(data)
# # 如果不是第一行(即文件已存在),对数据应用类型转换
# if not first:
# for i, col_type in enumerate(types):
# if col_type == "int" or col_type == "bigInt":
# try:
# new_data[i] = pd.to_numeric(new_data[i], errors='coerce').astype(int)
# except:
# new_data[i] = pd.to_numeric("0", errors='coerce').astype(int)
# elif col_type == "double":
# try:
# new_data[i] = pd.to_numeric(new_data[i], errors='coerce')(0.0)
# except:
# new_data[i] = pd.to_numeric("0.0", errors='coerce').astype(float)
# # 根据 record 筛选列
# new_data = new_data.loc[:, record]
# # 如果文件存在,则读取现有数据并追加新数据
# if first:
# combined_data = new_data
# else:
# # 使用 Pandas 读取现有数据
# existing_data = pd.read_excel(file_name)
# # 合并现有数据与新数据
# combined_data = pd.concat([existing_data, new_data], ignore_index=True)
# # 将合并后的数据写入 Excel
# combined_data.to_excel(file_name, index=False, engine='openpyxl')
# existing_data = []
# first = True
# # 检查文件是否存在
# if os.path.exists(file_name):
# # 使用 openpyxl 读取现有数据
# workbook = load_workbook(file_name, read_only=True)
# sheet = workbook.active
# # 读取已有行数
# num_rows = sheet.max_row
# if num_rows > 5000:
# print("Excel文件中的数据行数超过5000行过多的行数将会导致追加模式写入数据速度变慢建议更换为CSV文件或MySQL数据库存储数据。正在读取数据请稍等...")
# print("The number of rows in the Excel file exceeds 5000, too many rows will cause the speed of writing data in append mode to slow down, it is recommended to replace it with CSV file or MySQL database to store data. Reading data, please wait...")
# # existing_data = [[sheet.cell(row=i, column=j).value for j in range(1, sheet.max_column + 1)] for i in range(1, sheet.max_row + 1)]
# for i in range(1, sheet.max_row + 1):
# row_data = []
# if num_rows > 5000 and i % 500 == 0:
# print(f"正在读取第{i}/{num_rows}行的数据...")
# print(f"Reading data of row {i}/{num_rows}...")
# for j in range(1, sheet.max_column + 1):
# cell = sheet.cell(row=i, column=j).value
# if cell is None:
# cell = ""
# row_data.append(cell)
# existing_data.append(row_data)
# first = False # 如果文件存在,首行不再是标题行
# # 使用 xlsxwriter 创建新文件
# workbook = xlsxwriter.Workbook(file_name)
# worksheet = workbook.add_worksheet()
# # 写入现有数据
# for row_num, row_data in enumerate(existing_data):
# for col_num, cell in enumerate(row_data):
# worksheet.write(row_num, col_num, cell)
# # 写入新数据
# row = len(existing_data)
# for line in data:
# to_write = []
# for i in range(len(line)):
# value = line[i]
# if not first: # 如果不是第一行,需要转换数据类型
# if types[i] == "int" or types[i] == "bigInt":
# try:
# value = int(value)
# except ValueError:
# value = 0
# elif types[i] == "double":
# try:
# value = float(value)
# except ValueError:
# value = 0.0
# if record[i]:
# to_write.append(value)
# first = False # 更新 first 以跳过数据类型转换
# for col, item in enumerate(to_write):
# worksheet.write(row, col, item)
# row += 1
# # 关闭工作簿
# workbook.close()
first = False
if os.path.exists(file_name):
# 加载现有的工作簿
wb = load_workbook(file_name)
# 行数读取
num_rows = wb.active.max_row
if num_rows > 1000:
print("Excel文件中的数据行数已超过1000行过多的行数将会导致追加模式写入数据速度变慢建议增大任务保存对话框中的“每采集多少条数据保存一次”选项的值以提升采集速度或者更换为CSV文件或MySQL数据库存储数据。正在读取数据请稍等...")
print("The number of rows in the Excel file already exceeds 1000, too many rows will cause the speed of writing data in append mode to slow down, it is recommended to increase the value of the 'Save every how many data' option in the task save dialog to improve the collection speed, or replace it with CSV file or MySQL database to store data. Reading data, please wait...")
ws = wb.active
if num_rows > 1000:
print("读取数据完成,正在追加数据...")
print("Reading data completed, appending data...")
else:
# 创建新的工作簿和工作表
wb = Workbook()