mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-12 03:27:08 +08:00
Version 0.6.3
This commit is contained in:
parent
5180f47b70
commit
c56e87120d
@ -9,6 +9,7 @@ import threading
|
||||
# import undetected_chromedriver as uc
|
||||
from utils import detect_optimizable, download_image, extract_text_from_html, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \
|
||||
on_press_creator, on_release_creator, readCode, rename_downloaded_file, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json
|
||||
from constants import WriteMode, DataWriteMode, GraphOption
|
||||
from myChrome import MyChrome
|
||||
from threading import Thread, Event
|
||||
from PIL import Image
|
||||
@ -31,7 +32,6 @@ from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.chrome.service import Service
|
||||
from pynput.keyboard import Key, Listener
|
||||
from datetime import datetime
|
||||
import io # 遇到错误退出时应执行的代码
|
||||
import json
|
||||
@ -76,10 +76,7 @@ class BrowserThread(Thread):
|
||||
def __init__(self, browser_t, id, service, version, event, saveName, config, option):
|
||||
Thread.__init__(self)
|
||||
self.logs = io.StringIO()
|
||||
try:
|
||||
self.log = bool(service["recordLog"])
|
||||
except:
|
||||
self.log = True
|
||||
self.log = bool(service.get("recordLog", True))
|
||||
self.browser = browser_t
|
||||
self.option = option
|
||||
self.config = config
|
||||
@ -87,22 +84,13 @@ class BrowserThread(Thread):
|
||||
self.totalSteps = 0
|
||||
self.id = id
|
||||
self.event = event
|
||||
try:
|
||||
self.saveName = service["saveName"] # 保存文件的名字
|
||||
except:
|
||||
now = datetime.now()
|
||||
# 将时间格式化为精确到秒的字符串
|
||||
self.saveName = now.strftime("%Y_%m_%d_%H_%M_%S")
|
||||
now = datetime.now()
|
||||
self.saveName = service.get("saveName", now.strftime("%Y_%m_%d_%H_%M_%S")) # 保存文件的名字
|
||||
self.OUTPUT = ""
|
||||
self.SAVED = False
|
||||
self.BREAK = False
|
||||
self.CONTINUE = False
|
||||
try:
|
||||
maximizeWindow = service["maximizeWindow"]
|
||||
except:
|
||||
maximizeWindow = 0
|
||||
if maximizeWindow == 1:
|
||||
self.browser.maximize_window()
|
||||
self.browser.maximize_window() if service.get("maximizeWindow") == 1 else ...
|
||||
# 名称设定
|
||||
if saveName != "": # 命令行覆盖保存名称
|
||||
self.saveName = saveName # 保存文件的名字
|
||||
@ -123,13 +111,13 @@ class BrowserThread(Thread):
|
||||
self.getDataStep = 0
|
||||
self.startSteps = 0
|
||||
try:
|
||||
startFromExit = service["startFromExit"] # 从上次退出的步骤开始
|
||||
if startFromExit == 1:
|
||||
if service.get("startFromExit", 0) == 1:
|
||||
with open("Data/Task_" + str(self.id) + "/" + self.saveName + '_steps.txt', 'r',
|
||||
encoding='utf-8-sig') as file_obj:
|
||||
self.startSteps = int(file_obj.read()) # 读取已执行步数
|
||||
except:
|
||||
pass
|
||||
except Exception as e:
|
||||
self.print_and_log(f"读取steps.txt失败,原因:{str(e)}")
|
||||
|
||||
if self.startSteps != 0:
|
||||
self.print_and_log("此模式下,任务ID", self.id, "将从上次退出的步骤开始执行,之前已采集条数为",
|
||||
self.startSteps, "条。")
|
||||
@ -137,7 +125,7 @@ class BrowserThread(Thread):
|
||||
"will start from the last step, before we already collected", self.startSteps, " items.")
|
||||
else:
|
||||
self.print_and_log("此模式下,任务ID", self.id,
|
||||
"将从头F开始执行,如果需要从上次退出的步骤开始执行,请在保存任务时设置是否从上次保存位置开始执行为“是”。")
|
||||
"将从头开始执行,如果需要从上次退出的步骤开始执行,请在保存任务时设置是否从上次保存位置开始执行为“是”。")
|
||||
self.print_and_log("In this mode, task ID", self.id,
|
||||
"will start from the beginning, if you want to start from the last step, please set the option 'start from the last step' to 'yes' when saving the task.")
|
||||
stealth_path = driver_path[:driver_path.find(
|
||||
@ -145,13 +133,12 @@ class BrowserThread(Thread):
|
||||
with open(stealth_path, 'r') as f:
|
||||
js = f.read()
|
||||
self.print_and_log("Loading stealth.min.js")
|
||||
self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
|
||||
'source': js}) # TMALL 反扒
|
||||
self.browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': js}) # TMALL 反扒
|
||||
self.browser.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
||||
"source": """
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => undefined
|
||||
})
|
||||
"source": """
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => undefined
|
||||
})
|
||||
"""
|
||||
})
|
||||
WebDriverWait(self.browser, 10)
|
||||
@ -164,75 +151,65 @@ class BrowserThread(Thread):
|
||||
self.monitor_thread.start()
|
||||
# self.browser.get('about:blank')
|
||||
self.procedure = service["graph"] # 程序执行流程
|
||||
try:
|
||||
self.maxViewLength = service["maxViewLength"] # 最大显示长度
|
||||
except:
|
||||
self.maxViewLength = 15
|
||||
try:
|
||||
self.outputFormat = service["outputFormat"] # 输出格式
|
||||
except:
|
||||
self.outputFormat = "csv"
|
||||
try:
|
||||
self.task_version = service["version"] # 任务版本
|
||||
if service["version"] >= "0.3.1": # 0.3.1及以上版本以上的EasySpider兼容从0.3.1版本开始的所有版本
|
||||
pass
|
||||
else: # 0.3.1以下版本的EasySpider不兼容0.3.1及以上版本的EasySpider
|
||||
if service["version"] != version:
|
||||
self.print_and_log("版本不一致,请使用" +
|
||||
service["version"] + "版本的EasySpider运行该任务!")
|
||||
self.print_and_log("Version not match, please use EasySpider " +
|
||||
service["version"] + " to run this task!")
|
||||
self.browser.quit()
|
||||
sys.exit()
|
||||
except: # 0.2.0版本没有version字段,所以直接退出
|
||||
self.maxViewLength = service.get("maxViewLength", 15) # 最大显示长度
|
||||
self.outputFormat = service.get("outputFormat", "csv") # 输出格式
|
||||
self.save_threshold = service.get("saveThreshold", 10) # 保存最低阈值
|
||||
self.dataWriteMode = service.get("dataWriteMode", DataWriteMode.Append.value) # 数据写入模式,1为追加,2为覆盖,3为重命名文件
|
||||
self.task_version = service.get("version", "") # 任务版本
|
||||
|
||||
if not self.task_version:
|
||||
self.print_and_log("版本不一致,请使用v0.2.0版本的EasySpider运行该任务!")
|
||||
self.print_and_log(
|
||||
"Version not match, please use EasySpider v0.2.0 to run this task!")
|
||||
self.print_and_log("Version not match, please use EasySpider v0.2.0 to run this task!")
|
||||
self.browser.quit()
|
||||
sys.exit()
|
||||
try:
|
||||
self.save_threshold = service["saveThreshold"] # 保存最低阈值
|
||||
except:
|
||||
self.save_threshold = 10
|
||||
try:
|
||||
self.links = list(
|
||||
filter(isnotnull, service["links"].split("\n"))) # 要执行的link的列表
|
||||
except:
|
||||
|
||||
if self.task_version >= "0.3.1": # 0.3.1及以上版本以上的EasySpider兼容从0.3.1版本开始的所有版本
|
||||
pass
|
||||
elif self.task_version != version: # 0.3.1以下版本的EasySpider不兼容0.3.1及以上版本的EasySpider
|
||||
self.print_and_log(f"版本不一致,请使用{self.task_version}版本的EasySpider运行该任务!")
|
||||
self.print_and_log(f"Version not match, please use EasySpider {self.task_version} to run this task!")
|
||||
self.browser.quit()
|
||||
sys.exit()
|
||||
|
||||
service_links = service.get("links")
|
||||
if service_links:
|
||||
self.links = list(filter(isnotnull, service_links.split("\n"))) # 要执行的link的列表
|
||||
else:
|
||||
self.links = list(filter(isnotnull, service["url"])) # 要执行的link
|
||||
|
||||
self.OUTPUT = [] # 采集的数据
|
||||
try:
|
||||
self.dataWriteMode = service["dataWriteMode"] # 数据写入模式,1为追加,2为覆盖,3为重命名文件
|
||||
except:
|
||||
self.dataWriteMode = 1
|
||||
if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx" or self.outputFormat == "json":
|
||||
if self.outputFormat in ["csv", "txt", "xlsx", "json"]:
|
||||
if os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
|
||||
if self.dataWriteMode == 2:
|
||||
if self.dataWriteMode == DataWriteMode.Cover.value:
|
||||
os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat)
|
||||
elif self.dataWriteMode == 3:
|
||||
elif self.dataWriteMode == DataWriteMode.Rename.value:
|
||||
i = 2
|
||||
while os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '_' + str(i) + '.' + self.outputFormat):
|
||||
i = i + 1
|
||||
self.saveName = self.saveName + '_' + str(i)
|
||||
self.print_and_log("文件已存在,已重命名为", self.saveName)
|
||||
self.writeMode = 1 # 写入模式,0为新建,1为追加
|
||||
if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx":
|
||||
if not os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat):
|
||||
self.writeMode = WriteMode.Create.value # 写入模式,0为新建,1为追加
|
||||
if self.outputFormat in ['csv', 'txt', 'xlsx']:
|
||||
if not os.path.exists(f"Data/Task_{str(self.id)}/{self.saveName}.{self.outputFormat}"):
|
||||
self.OUTPUT.append([]) # 添加表头
|
||||
self.writeMode = 0
|
||||
self.writeMode = WriteMode.Create.value
|
||||
elif self.outputFormat == "json":
|
||||
self.writeMode = 3 # JSON模式无需判断是否存在文件
|
||||
self.writeMode = WriteMode.Json.value # JSON模式无需判断是否存在文件
|
||||
elif self.outputFormat == "mysql":
|
||||
self.mysql = myMySQL(config["mysql_config_path"])
|
||||
self.mysql.create_table(self.saveName, service["outputParameters"], remove_if_exists=self.dataWriteMode == 2)
|
||||
self.writeMode = 2
|
||||
if self.writeMode == 0:
|
||||
self.mysql.create_table(self.saveName, service["outputParameters"],
|
||||
remove_if_exists=self.dataWriteMode == DataWriteMode.Cover.value)
|
||||
self.writeMode = WriteMode.MySQL.value # MySQL模式
|
||||
|
||||
if self.writeMode == WriteMode.Create.value:
|
||||
self.print_and_log("新建模式|Create Mode")
|
||||
elif self.writeMode == 1:
|
||||
elif self.writeMode == WriteMode.Append.value:
|
||||
self.print_and_log("追加模式|Append Mode")
|
||||
elif self.writeMode == 2:
|
||||
elif self.writeMode == WriteMode.MySQL.value:
|
||||
self.print_and_log("MySQL模式|MySQL Mode")
|
||||
elif self.writeMode == 3:
|
||||
elif self.writeMode == WriteMode.Json.value:
|
||||
self.print_and_log("JSON模式|JSON Mode")
|
||||
|
||||
self.containJudge = service["containJudge"] # 是否含有判断语句
|
||||
self.outputParameters = {}
|
||||
self.service = service
|
||||
@ -245,191 +222,140 @@ class BrowserThread(Thread):
|
||||
if param["name"] not in self.outputParameters.keys():
|
||||
self.outputParameters[param["name"]] = ""
|
||||
self.dataNotFoundKeys[param["name"]] = False
|
||||
try:
|
||||
self.outputParametersTypes.append(param["type"])
|
||||
except:
|
||||
self.outputParametersTypes.append("text")
|
||||
try:
|
||||
self.outputParametersRecord.append(
|
||||
bool(param["recordASField"]))
|
||||
except:
|
||||
self.outputParametersRecord.append(True)
|
||||
self.outputParametersTypes.append(param.get("type", "text"))
|
||||
self.outputParametersRecord.append(bool(param.get("recordASField", True)))
|
||||
# 文件叠加的时候不添加表头
|
||||
if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "xlsx":
|
||||
if self.writeMode == 0:
|
||||
self.OUTPUT[0].append(param["name"])
|
||||
if self.outputFormat in ["csv", "txt", "xlsx"] and self.writeMode == WriteMode.Create.value:
|
||||
self.OUTPUT[0].append(param["name"])
|
||||
self.urlId = 0 # 全局记录变量
|
||||
self.preprocess() # 预处理,优化提取数据流程
|
||||
try:
|
||||
self.inputExcel = service["inputExcel"] # 输入Excel
|
||||
except:
|
||||
self.inputExcel = ""
|
||||
self.inputExcel = service.get("inputExcel", "") # 输入Excel
|
||||
self.readFromExcel() # 读取Excel获得参数值
|
||||
|
||||
# 检测如果没有复杂的操作,优化提取数据流程
|
||||
def preprocess(self):
|
||||
for node in self.procedure:
|
||||
try:
|
||||
iframe = node["parameters"]["iframe"]
|
||||
except:
|
||||
node["parameters"]["iframe"] = False
|
||||
for index_node, node in enumerate(self.procedure):
|
||||
parameters: dict = node["parameters"]
|
||||
iframe = parameters.get('iframe')
|
||||
option = node["option"]
|
||||
|
||||
try:
|
||||
node["parameters"]["xpath"] = lowercase_tags_in_xpath(
|
||||
node["parameters"]["xpath"])
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
node["parameters"]["waitElementIframeIndex"] = int(
|
||||
node["parameters"]["waitElementIframeIndex"])
|
||||
except:
|
||||
node["parameters"]["waitElement"] = ""
|
||||
node["parameters"]["waitElementTime"] = 10
|
||||
node["parameters"]["waitElementIframeIndex"] = 0
|
||||
if node["option"] == 1: # 打开网页操作
|
||||
try:
|
||||
cookies = node["parameters"]["cookies"]
|
||||
except:
|
||||
node["parameters"]["cookies"] = ""
|
||||
elif node["option"] == 2: # 点击操作
|
||||
try:
|
||||
alertHandleType = node["parameters"]["alertHandleType"]
|
||||
except:
|
||||
node["parameters"]["alertHandleType"] = 0
|
||||
if node["parameters"]["useLoop"]:
|
||||
parameters["iframe"] = False if not iframe else parameters.get('iframe', False)
|
||||
if parameters.get("xpath"):
|
||||
parameters["xpath"] = lowercase_tags_in_xpath(parameters["xpath"])
|
||||
|
||||
if parameters.get("waitElementIframeIndex"):
|
||||
parameters["waitElementIframeIndex"] = int(parameters["waitElementIframeIndex"])
|
||||
else:
|
||||
parameters["waitElement"] = ""
|
||||
parameters["waitElementTime"] = 10
|
||||
parameters["waitElementIframeIndex"] = 0
|
||||
|
||||
if option == GraphOption.Get.value: # 打开网页操作
|
||||
parameters["cookies"] = parameters.get("cookies", "")
|
||||
elif option == GraphOption.Click.value: # 点击操作
|
||||
parameters["alertHandleType"] = parameters.get("alertHandleType", 0)
|
||||
if parameters.get("useLoop"):
|
||||
if self.task_version <= "0.3.5":
|
||||
# 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath
|
||||
node["parameters"]["xpath"] = ""
|
||||
self.print_and_log("您的任务版本号为" + self.task_version +
|
||||
",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath")
|
||||
elif node["option"] == 3: # 提取数据操作
|
||||
node["parameters"]["recordASField"] = 0
|
||||
try:
|
||||
params = node["parameters"]["params"]
|
||||
except:
|
||||
node["parameters"]["params"] = node["parameters"]["paras"] # 兼容0.5.0及以下版本的EasySpider
|
||||
params = node["parameters"]["params"]
|
||||
try:
|
||||
clear = node["parameters"]["clear"]
|
||||
except:
|
||||
node["parameters"]["clear"] = 0
|
||||
try:
|
||||
newLine = node["parameters"]["newLine"]
|
||||
except:
|
||||
node["parameters"]["newLine"] = 1
|
||||
parameters["xpath"] = ""
|
||||
self.print_and_log(f"您的任务版本号为{self.task_version},循环点击不支持相对XPath写法,已自动切换为纯循环的XPath")
|
||||
elif option == GraphOption.Extract.value: # 提取数据操作
|
||||
parameters["recordASField"] = 0
|
||||
parameters["params"] = parameters.get("params", parameters.get("paras")) # 兼容0.5.0及以下版本的EasySpider
|
||||
parameters["clear"] = parameters.get("clear", 0)
|
||||
parameters["newLine"] = parameters.get("newLine", 1)
|
||||
|
||||
params = parameters["params"]
|
||||
for param in params:
|
||||
try:
|
||||
iframe = param["iframe"]
|
||||
except:
|
||||
param["iframe"] = False
|
||||
try:
|
||||
param["iframe"] = param.get("iframe", False)
|
||||
|
||||
if param.get("relativeXPath"):
|
||||
param["relativeXPath"] = lowercase_tags_in_xpath(param["relativeXPath"])
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
node["parameters"]["recordASField"] = param["recordASField"]
|
||||
except:
|
||||
node["parameters"]["recordASField"] = 1
|
||||
try:
|
||||
splitLine = int(param["splitLine"])
|
||||
except:
|
||||
param["splitLine"] = 0
|
||||
if param["contentType"] == 8:
|
||||
self.print_and_log(
|
||||
"默认的ddddocr识别功能如果觉得不好用,可以自行修改源码get_content函数->contentType == 8的位置换成自己想要的OCR模型然后自己编译运行;或者可以先设置采集内容类型为“元素截图”把图片保存下来,然后用自定义操作调用自己写的程序,程序的功能是读取这个最新生成的图片,然后用好用的模型,如PaddleOCR把图片识别出来,然后把返回值返回给程序作为参数输出。")
|
||||
self.print_and_log(
|
||||
"If you think the default ddddocr function is not good enough, you can modify the source code get_content function -> contentType == 8 position to your own OCR model and then compile and run it; or you can first set the content type of the crawler to \"Element Screenshot\" to save the picture, and then call your own program with custom operations. The function of the program is to read the latest generated picture, then use a good model, such as PaddleOCR to recognize the picture, and then return the return value as a parameter output to the program.")
|
||||
|
||||
parameters["recordASField"] = param.get("recordASField", 1)
|
||||
|
||||
param["splitLine"] = 0 if not param.get("splitLine") else param.get("splitLine")
|
||||
|
||||
if param.get("contentType") == 8:
|
||||
self.print_and_log("默认的ddddocr识别功能如果觉得不好用,可以自行修改源码get_content函数->contentType =="
|
||||
"8的位置换成自己想要的OCR模型然后自己编译运行;或者可以先设置采集内容类型为“元素截图”把图片"
|
||||
"保存下来,然后用自定义操作调用自己写的程序,程序的功能是读取这个最新生成的图片,然后用好用"
|
||||
"的模型,如PaddleOCR把图片识别出来,然后把返回值返回给程序作为参数输出。")
|
||||
self.print_and_log("If you think the default ddddocr function is not good enough, you can "
|
||||
"modify the source code get_content function -> contentType == 8 position "
|
||||
"to your own OCR model and then compile and run it; or you can first set "
|
||||
"the content type of the crawler to \"Element Screenshot\" to save the "
|
||||
"picture, and then call your own program with custom operations. The "
|
||||
"function of the program is to read the latest generated picture, then use "
|
||||
"a good model, such as PaddleOCR to recognize the picture, and then return "
|
||||
"the return value as a parameter output to the program.")
|
||||
param["optimizable"] = detect_optimizable(param)
|
||||
elif node["option"] == 4: # 输入文字
|
||||
try:
|
||||
index = node["parameters"]["index"] # 索引值
|
||||
except:
|
||||
node["parameters"]["index"] = 0
|
||||
elif node["option"] == 5: # 自定义操作
|
||||
try:
|
||||
clear = node["parameters"]["clear"]
|
||||
except:
|
||||
node["parameters"]["clear"] = 0
|
||||
try:
|
||||
newLine = node["parameters"]["newLine"]
|
||||
except:
|
||||
node["parameters"]["newLine"] = 1
|
||||
elif node["option"] == 7: # 移动到元素
|
||||
if node["parameters"]["useLoop"]:
|
||||
if self.task_version <= "0.3.5":
|
||||
# 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath
|
||||
node["parameters"]["xpath"] = ""
|
||||
self.print_and_log("您的任务版本号为" + self.task_version +
|
||||
",循环点击不支持相对XPath写法,已自动切换为纯循环的XPath")
|
||||
elif node["option"] == 8: # 循环操作
|
||||
try:
|
||||
exitElement = node["parameters"]["exitElement"]
|
||||
if exitElement == "":
|
||||
node["parameters"]["exitElement"] = "//body"
|
||||
except:
|
||||
node["parameters"]["exitElement"] = "//body"
|
||||
node["parameters"]["quickExtractable"] = False # 是否可以快速提取
|
||||
try:
|
||||
skipCount = node["parameters"]["skipCount"]
|
||||
except:
|
||||
node["parameters"]["skipCount"] = 0
|
||||
elif option == GraphOption.Input.value: # 输入文字
|
||||
parameters['index'] = parameters.get('index', 0)
|
||||
elif option == GraphOption.Custom.value: # 自定义操作
|
||||
parameters['clear'] = parameters.get('clear', 0)
|
||||
parameters['newLine'] = parameters.get('newLine', 1)
|
||||
elif option == GraphOption.Move.value: # 移动到元素
|
||||
if parameters.get('useLoop'):
|
||||
if self.task_version <= "0.3.5": # 0.3.5及以下版本的EasySpider下的循环点击不支持相对XPath
|
||||
parameters["xpath"] = ""
|
||||
self.print_and_log(f"您的任务版本号为{self.task_version},循环点击不支持相对XPath写法,已自动切换为纯循环的XPath")
|
||||
elif option == GraphOption.Loop.value: # 循环操作
|
||||
parameters['exitElement'] = "//body" if not parameters.get('exitElement') or parameters.get('exitElement') == "" else parameters.get('exitElement')
|
||||
parameters["quickExtractable"] = False # 是否可以快速提取
|
||||
parameters['skipCount'] = parameters.get('skipCount', 0)
|
||||
|
||||
# 如果(不)固定元素列表循环中只有一个提取数据操作,且提取数据操作的提取内容为元素截图,那么可以快速提取
|
||||
if len(node["sequence"]) == 1 and self.procedure[node["sequence"][0]]["option"] == 3 and (int(node["parameters"]["loopType"]) == 1 or int(node["parameters"]["loopType"]) == 2):
|
||||
try:
|
||||
params = self.procedure[node["sequence"][0]]["parameters"]["params"]
|
||||
except:
|
||||
params = self.procedure[node["sequence"][0]]["parameters"]["paras"] # 兼容0.5.0及以下版本的EasySpider
|
||||
try:
|
||||
waitElement = self.procedure[node["sequence"][0]]["parameters"]["waitElement"]
|
||||
except:
|
||||
waitElement = ""
|
||||
if node["parameters"]["iframe"]:
|
||||
node["parameters"]["quickExtractable"] = False # 如果是iframe,那么不可以快速提取
|
||||
if len(node["sequence"]) == 1 and self.procedure[node["sequence"][0]]["option"] == 3 \
|
||||
and (int(node["parameters"]["loopType"]) == 1 or int(node["parameters"]["loopType"]) == 2):
|
||||
params = self.procedure[node["sequence"][0]].get("parameters").get("params")
|
||||
if not params:
|
||||
params = self.procedure[node["sequence"][0]]["parameters"]["paras"] # 兼容0.5.0及以下版本的EasySpider
|
||||
|
||||
waitElement = self.procedure[node["sequence"][0]]["parameters"].get("waitElement", "")
|
||||
|
||||
if parameters["iframe"]:
|
||||
parameters["quickExtractable"] = False # 如果是iframe,那么不可以快速提取
|
||||
else:
|
||||
node["parameters"]["quickExtractable"] = True # 先假设可以快速提取
|
||||
if node["parameters"]["skipCount"] > 0:
|
||||
node["parameters"]["quickExtractable"] = False # 如果有跳过的元素,那么不可以快速提取
|
||||
parameters["quickExtractable"] = True # 先假设可以快速提取
|
||||
|
||||
if parameters["skipCount"] > 0:
|
||||
parameters["quickExtractable"] = False # 如果有跳过的元素,那么不可以快速提取
|
||||
|
||||
for param in params:
|
||||
optimizable = detect_optimizable(param, ignoreWaitElement=False, waitElement=waitElement)
|
||||
try:
|
||||
iframe = param["iframe"]
|
||||
except:
|
||||
param["iframe"] = False
|
||||
if param["iframe"] and not param["relative"]: # 如果是iframe,那么不可以快速提取
|
||||
param['iframe'] = param.get('iframe', False)
|
||||
if param["iframe"] and not param["relative"]: # 如果是iframe,那么不可以快速提取
|
||||
optimizable = False
|
||||
if not optimizable: # 如果有一个不满足优化条件,那么就不能快速提取
|
||||
node["parameters"]["quickExtractable"] = False
|
||||
if not optimizable: # 如果有一个不满足优化条件,那么就不能快速提取
|
||||
parameters["quickExtractable"] = False
|
||||
break
|
||||
if node["parameters"]["quickExtractable"]:
|
||||
self.print_and_log("循环操作<" + node["title"] + ">可以快速提取数据")
|
||||
self.print_and_log("Loop operation <" + node["title"] + "> can extract data quickly")
|
||||
try:
|
||||
node["parameters"]["clear"] = self.procedure[node["sequence"][0]]["parameters"]["clear"]
|
||||
except:
|
||||
node["parameters"]["clear"] = 0
|
||||
try:
|
||||
node["parameters"]["newLine"] = self.procedure[node["sequence"][0]]["parameters"]["newLine"]
|
||||
except:
|
||||
node["parameters"]["newLine"] = 1
|
||||
if int(node["parameters"]["loopType"]) == 1: # 不固定元素列表
|
||||
|
||||
if parameters["quickExtractable"]:
|
||||
self.print_and_log(f"循环操作<{node['title']}>可以快速提取数据")
|
||||
self.print_and_log(f"Loop operation <{node['title']}> can extract data quickly")
|
||||
parameters["clear"] = self.procedure[node["sequence"][0]]["parameters"].get("clear", 0)
|
||||
parameters["newLine"] = self.procedure[node["sequence"][0]]["parameters"].get("newLine", 1)
|
||||
|
||||
if int(node["parameters"]["loopType"]) == 1: # 不固定元素列表
|
||||
node["parameters"]["baseXPath"] = node["parameters"]["xpath"]
|
||||
elif int(node["parameters"]["loopType"]) == 2: # 固定元素列表
|
||||
elif int(node["parameters"]["loopType"]) == 2: # 固定元素列表
|
||||
node["parameters"]["baseXPath"] = node["parameters"]["pathList"]
|
||||
node["parameters"]["quickParams"] = []
|
||||
for param in params:
|
||||
content_type = ""
|
||||
if param["relativeXPath"].find("/@href") >= 0 or param["relativeXPath"].find("/text()") >= 0 or param["relativeXPath"].find(
|
||||
"::text()") >= 0:
|
||||
if param["relativeXPath"].find("/@href") >= 0 or param["relativeXPath"].find("/text()") >= 0 \
|
||||
or param["relativeXPath"].find("::text()") >= 0:
|
||||
content_type = ""
|
||||
elif param["nodeType"] == 2:
|
||||
content_type = "//@href"
|
||||
elif param["nodeType"] == 4: # 图片链接
|
||||
elif param["nodeType"] == 4: # 图片链接
|
||||
content_type = "//@src"
|
||||
elif param["contentType"] == 1:
|
||||
content_type = "/text()"
|
||||
elif param["contentType"] == 0:
|
||||
content_type = "//text()"
|
||||
if param["relative"]: # 如果是相对XPath
|
||||
if param["relative"]: # 如果是相对XPath
|
||||
xpath = "." + param["relativeXPath"] + content_type
|
||||
else:
|
||||
xpath = param["relativeXPath"] + content_type
|
||||
@ -443,6 +369,7 @@ class BrowserThread(Thread):
|
||||
"nodeType": param["nodeType"],
|
||||
"default": param["default"],
|
||||
})
|
||||
self.procedure[index_node]["parameters"] = parameters
|
||||
self.print_and_log("预处理完成|Preprocess completed")
|
||||
|
||||
def readFromExcel(self):
|
||||
@ -559,7 +486,10 @@ class BrowserThread(Thread):
|
||||
self.print_and_log(f"任务执行完毕,将在{quitWaitTime}秒后自动退出浏览器并清理临时用户目录,等待时间可在保存任务对话框中设置。")
|
||||
self.print_and_log(f"The task is completed, the browser will exit automatically and the temporary user directory will be cleaned up after {quitWaitTime} seconds, the waiting time can be set in the save task dialog.")
|
||||
time.sleep(quitWaitTime)
|
||||
self.browser.quit()
|
||||
try:
|
||||
self.browser.quit()
|
||||
except:
|
||||
pass
|
||||
self.print_and_log("正在清理临时用户目录……|Cleaning up temporary user directory...")
|
||||
try:
|
||||
shutil.rmtree(self.option["tmp_user_data_folder"])
|
||||
@ -775,18 +705,20 @@ class BrowserThread(Thread):
|
||||
self.browser.set_script_timeout(max_wait_time)
|
||||
try:
|
||||
output = self.browser.execute_script(code)
|
||||
except:
|
||||
except Exception as e:
|
||||
output = ""
|
||||
self.recordLog("JavaScript execution failed")
|
||||
self.print_and_log("执行下面的代码时出错:" + code, ",错误为:", str(e))
|
||||
self.print_and_log("Error executing the following code:" + code, ", error is:", str(e))
|
||||
elif int(codeMode) == 2:
|
||||
self.recordLog("Execute JavaScript for element:" + code)
|
||||
self.recordLog("对元素执行JavaScript:" + code)
|
||||
self.browser.set_script_timeout(max_wait_time)
|
||||
try:
|
||||
output = self.browser.execute_script(code, element)
|
||||
except:
|
||||
except Exception as e:
|
||||
output = ""
|
||||
self.recordLog("JavaScript execution failed")
|
||||
self.print_and_log("执行下面的代码时出错:" + code, ",错误为:", str(e))
|
||||
self.print_and_log("Error executing the following code:" + code, ", error is:", str(e))
|
||||
elif int(codeMode) == 5:
|
||||
try:
|
||||
code = readCode(code)
|
||||
@ -796,9 +728,9 @@ class BrowserThread(Thread):
|
||||
self.recordLog("执行下面的代码:" + code)
|
||||
self.recordLog("Execute the following code:" + code)
|
||||
except Exception as e:
|
||||
self.print_and_log("执行下面的代码时出错:" + code, ",错误为:", e)
|
||||
self.print_and_log("执行下面的代码时出错:" + code, ",错误为:", str(e))
|
||||
self.print_and_log("Error executing the following code:" +
|
||||
code, ", error is:", e)
|
||||
code, ", error is:", str(e))
|
||||
elif int(codeMode) == 6:
|
||||
try:
|
||||
code = readCode(code)
|
||||
@ -1216,6 +1148,14 @@ class BrowserThread(Thread):
|
||||
self.history["handle"] = thisHandle
|
||||
thisHistoryURL = self.browser.current_url
|
||||
# 快速提取处理
|
||||
# start = time.time()
|
||||
try:
|
||||
tree = html.fromstring(self.browser.page_source)
|
||||
except Exception as e:
|
||||
self.print_and_log("解析页面时出错,将切换普通提取模式|Error parsing page, will switch to normal extraction mode")
|
||||
node["parameters"]["quickExtractable"] = False
|
||||
# end = time.time()
|
||||
# print("解析页面秒数:", end - start)
|
||||
if node["parameters"]["quickExtractable"]:
|
||||
self.browser.switch_to.default_content() # 切换到主页面
|
||||
tree = html.fromstring(self.browser.page_source)
|
||||
@ -2252,7 +2192,8 @@ if __name__ == '__main__':
|
||||
"server_address": "http://localhost:8074",
|
||||
"keyboard": True, # 是否监听键盘输入
|
||||
"pause_key": "p", # 暂停键
|
||||
"version": "0.6.2",
|
||||
"version": "0.6.3",
|
||||
"docker_driver": "",
|
||||
}
|
||||
c = Config(config)
|
||||
print(c)
|
||||
@ -2389,9 +2330,13 @@ if __name__ == '__main__':
|
||||
print("id: ", id)
|
||||
if c.read_type == "remote":
|
||||
print("remote")
|
||||
content = requests.get(
|
||||
try:
|
||||
content = requests.get(
|
||||
c.server_address + "/queryExecutionInstance?id=" + str(id))
|
||||
service = json.loads(content.text) # 加载服务信息
|
||||
service = json.loads(content.text) # 加载服务信息
|
||||
except:
|
||||
print("Cannot connect to the server, please make sure that the EasySpider Main Program is running, or you can change the --read_type parameter to 'local' to read the task information from the local task file without keeping the EasySpider Main Program running.")
|
||||
print("无法连接到服务器,请确保EasySpider主程序正在运行,或者您可以将--read_type参数更改为'local',以实现从本地任务文件中读取任务信息而无需保持EasySpider主程序运行。")
|
||||
else:
|
||||
print("local")
|
||||
local_folder = os.path.join(os.getcwd(), "execution_instances")
|
||||
@ -2442,8 +2387,17 @@ if __name__ == '__main__':
|
||||
except:
|
||||
browser = "chrome"
|
||||
if browser == "chrome":
|
||||
selenium_service = Service(executable_path=driver_path)
|
||||
browser_t = MyChrome(service=selenium_service, options=options)
|
||||
if c.docker_driver == "":
|
||||
print("Using local driver")
|
||||
selenium_service = Service(executable_path=driver_path)
|
||||
browser_t = MyChrome(service=selenium_service, options=options, mode='local_driver')
|
||||
else:
|
||||
print("Using remote driver")
|
||||
# Use docker driver, default address is http://localhost:4444/wd/hub
|
||||
# Headless mode
|
||||
# options.add_argument("--headless")
|
||||
# print("Headless mode")
|
||||
browser_t = MyChrome(command_executor=c.docker_driver, options=options, mode='remote_driver')
|
||||
elif browser == "edge":
|
||||
from selenium.webdriver.edge.service import Service as EdgeService
|
||||
from selenium.webdriver.edge.options import Options as EdgeOptions
|
||||
@ -2504,6 +2458,7 @@ if __name__ == '__main__':
|
||||
# print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
|
||||
# 使用监听器监听键盘输入
|
||||
try:
|
||||
from pynput.keyboard import Key, Listener
|
||||
if c.keyboard:
|
||||
with Listener(on_press=on_press_creator(press_time, event),
|
||||
on_release=on_release_creator(event, press_time)) as listener:
|
||||
|
@ -19,11 +19,16 @@ desired_capabilities["pageLoadStrategy"] = "none"
|
||||
|
||||
|
||||
|
||||
class MyChrome(webdriver.Chrome):
|
||||
class MyChrome(webdriver.Chrome, webdriver.Remote):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
def __init__(self, mode='local_driver', *args, **kwargs):
|
||||
self.iframe_env = False # 现在的环境是root还是iframe
|
||||
super().__init__(*args, **kwargs) # 调用父类的 __init__
|
||||
self.mode = mode
|
||||
if mode == "local_driver":
|
||||
webdriver.Chrome.__init__(self, *args, **kwargs)
|
||||
elif mode == "remote_driver":
|
||||
webdriver.Remote.__init__(self, *args, **kwargs)
|
||||
# super().__init__(*args, **kwargs) # 调用父类的 __init__
|
||||
|
||||
# def find_element(self, by=By.ID, value=None, iframe=False):
|
||||
# # 在这里改变查找元素的行为
|
||||
|
@ -64,7 +64,7 @@ def compress_folder_to_7z_split(folder_path, output_file):
|
||||
except:
|
||||
subprocess.call(["7zz", "a", "-v95m", output_file, folder_path])
|
||||
|
||||
easyspider_version = "0.6.2"
|
||||
easyspider_version = "0.6.3"
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
@ -30,7 +30,7 @@ def update_file_version(file_path, new_version, key="当前版本/Current Versio
|
||||
file.write(line)
|
||||
|
||||
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
|
||||
# py html js
|
||||
|
||||
@ -47,7 +47,8 @@ if __name__ == "__main__":
|
||||
|
||||
# index.html
|
||||
file_path = "./src/index.html"
|
||||
update_file_version(file_path, version, key="当前版本/Current Version: <b>v")
|
||||
update_file_version(file_path, version, key="软件当前版本:<b>v")
|
||||
update_file_version(file_path, version, key="Current Version: <b>v")
|
||||
|
||||
# package.json
|
||||
file_path = "./package.json"
|
||||
|
@ -1 +1 @@
|
||||
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}
|
||||
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"lang":"en","copyright":1,"sys_arch":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Document\\Projects\\EasySpider\\ElectronJS\\user_data"}
|
1
ElectronJS/config1.json
Normal file
1
ElectronJS/config1.json
Normal file
@ -0,0 +1 @@
|
||||
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data","lang":"zh"}
|
@ -150,8 +150,8 @@ function createWindow() {
|
||||
server_address +
|
||||
"/index.html?user_data_folder=" +
|
||||
config.user_data_folder +
|
||||
"©right=" +
|
||||
config.copyright,
|
||||
"©right=" + config.copyright +
|
||||
"&lang=" + config.lang,
|
||||
{extraHeaders: "pragma: no-cache\n"}
|
||||
);
|
||||
// 隐藏菜单栏
|
||||
@ -162,9 +162,8 @@ function createWindow() {
|
||||
app.quit();
|
||||
}
|
||||
});
|
||||
//调试模式
|
||||
// mainWindow.webContents.openDevTools();
|
||||
// Open the DevTools.
|
||||
// mainWindow.webContents.openDevTools()
|
||||
}
|
||||
|
||||
async function findElementRecursive(driver, by, value, frames) {
|
||||
@ -1558,6 +1557,17 @@ app.whenReady().then(() => {
|
||||
path.join(task_server.getDir(), "config.json"),
|
||||
JSON.stringify(config)
|
||||
);
|
||||
//重新读取配置文件
|
||||
config = JSON.parse(fs.readFileSync(path.join(task_server.getDir(), "config.json")));
|
||||
});
|
||||
ipcMain.on("change-lang", function (event, arg) {
|
||||
config.lang = arg;
|
||||
fs.writeFileSync(
|
||||
path.join(task_server.getDir(), "config.json"),
|
||||
JSON.stringify(config)
|
||||
);
|
||||
//重新读取配置文件
|
||||
config = JSON.parse(fs.readFileSync(path.join(task_server.getDir(), "config.json")));
|
||||
});
|
||||
createWindow();
|
||||
|
||||
|
57
ElectronJS/package-lock.json
generated
57
ElectronJS/package-lock.json
generated
@ -17,8 +17,8 @@
|
||||
"multer": "^1.4.5-lts.1",
|
||||
"node-abi": "^3.52.0",
|
||||
"node-window-manager": "^2.2.4",
|
||||
"selenium-webdriver": "^4.16.0",
|
||||
"ws": "^8.17.1",
|
||||
"selenium-webdriver": "^4.27.0",
|
||||
"ws": "^8.18.0",
|
||||
"xlsx": "^0.18.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
@ -30,6 +30,11 @@
|
||||
"electron": "^27.1.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@bazel/runfiles": {
|
||||
"version": "6.3.1",
|
||||
"resolved": "https://registry.npmjs.org/@bazel/runfiles/-/runfiles-6.3.1.tgz",
|
||||
"integrity": "sha512-1uLNT5NZsUVIGS4syuHwTzZ8HycMPyr6POA3FCE4GbMtc4rhoJk8aZKtNIRthJYfL+iioppi+rTfH3olMPr9nA=="
|
||||
},
|
||||
"node_modules/@electron-forge/cli": {
|
||||
"version": "6.2.1",
|
||||
"dev": true,
|
||||
@ -1203,6 +1208,7 @@
|
||||
},
|
||||
"node_modules/balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/base64-js": {
|
||||
@ -1307,6 +1313,7 @@
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
@ -1668,6 +1675,7 @@
|
||||
},
|
||||
"node_modules/concat-map": {
|
||||
"version": "0.0.1",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/concat-stream": {
|
||||
@ -2738,6 +2746,7 @@
|
||||
},
|
||||
"node_modules/fs.realpath": {
|
||||
"version": "1.0.0",
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/function-bind": {
|
||||
@ -2887,6 +2896,7 @@
|
||||
},
|
||||
"node_modules/glob": {
|
||||
"version": "7.2.3",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"fs.realpath": "^1.0.0",
|
||||
@ -3236,6 +3246,7 @@
|
||||
},
|
||||
"node_modules/inflight": {
|
||||
"version": "1.0.6",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"once": "^1.3.0",
|
||||
@ -3799,6 +3810,7 @@
|
||||
},
|
||||
"node_modules/minimatch": {
|
||||
"version": "3.1.2",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
@ -4366,6 +4378,7 @@
|
||||
},
|
||||
"node_modules/path-is-absolute": {
|
||||
"version": "1.0.1",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
@ -4816,6 +4829,7 @@
|
||||
},
|
||||
"node_modules/rimraf": {
|
||||
"version": "3.0.2",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"glob": "^7.1.3"
|
||||
@ -4883,16 +4897,27 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/selenium-webdriver": {
|
||||
"version": "4.16.0",
|
||||
"resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.16.0.tgz",
|
||||
"integrity": "sha512-IbqpRpfGE7JDGgXHJeWuCqT/tUqnLvZ14csSwt+S8o4nJo3RtQoE9VR4jB47tP/A8ArkYsh/THuMY6kyRP6kuA==",
|
||||
"version": "4.27.0",
|
||||
"resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.27.0.tgz",
|
||||
"integrity": "sha512-LkTJrNz5socxpPnWPODQ2bQ65eYx9JK+DQMYNihpTjMCqHwgWGYQnQTCAAche2W3ZP87alA+1zYPvgS8tHNzMQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/SeleniumHQ"
|
||||
},
|
||||
{
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/selenium"
|
||||
}
|
||||
],
|
||||
"dependencies": {
|
||||
"@bazel/runfiles": "^6.3.1",
|
||||
"jszip": "^3.10.1",
|
||||
"tmp": "^0.2.1",
|
||||
"ws": ">=8.14.2"
|
||||
"tmp": "^0.2.3",
|
||||
"ws": "^8.18.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 14.20.0"
|
||||
"node": ">= 14.21.0"
|
||||
}
|
||||
},
|
||||
"node_modules/semver": {
|
||||
@ -5431,13 +5456,11 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tmp": {
|
||||
"version": "0.2.1",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"rimraf": "^3.0.0"
|
||||
},
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.3.tgz",
|
||||
"integrity": "sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==",
|
||||
"engines": {
|
||||
"node": ">=8.17.0"
|
||||
"node": ">=14.14"
|
||||
}
|
||||
},
|
||||
"node_modules/tmp-promise": {
|
||||
@ -5711,9 +5734,9 @@
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.17.1",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz",
|
||||
"integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==",
|
||||
"version": "8.18.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz",
|
||||
"integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
|
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "easy-spider",
|
||||
"productName": "EasySpider",
|
||||
"version": "0.6.2",
|
||||
"version": "0.6.3",
|
||||
"icon": "./favicon",
|
||||
"description": "NoCode Visual Web Crawler",
|
||||
"main": "main.js",
|
||||
@ -39,8 +39,8 @@
|
||||
"multer": "^1.4.5-lts.1",
|
||||
"node-abi": "^3.52.0",
|
||||
"node-window-manager": "^2.2.4",
|
||||
"selenium-webdriver": "^4.16.0",
|
||||
"ws": "^8.17.1",
|
||||
"selenium-webdriver": "^4.27.0",
|
||||
"ws": "^8.18.0",
|
||||
"xlsx": "^0.18.5"
|
||||
},
|
||||
"config": {
|
||||
@ -67,7 +67,7 @@
|
||||
],
|
||||
"packagerConfig": {
|
||||
"icon": "./favicon",
|
||||
"appVersion": "0.6.2",
|
||||
"appVersion": "0.6.3",
|
||||
"name": "EasySpider",
|
||||
"executableName": "EasySpider",
|
||||
"appCopyright": "Naibo Wang (naibowang@foxmail.com)",
|
||||
|
@ -66,6 +66,7 @@ if (!fs.existsSync(path.join(getDir(), "config.json"))) {
|
||||
webserver_port: 8074,
|
||||
user_data_folder: "./user_data",
|
||||
debug: false,
|
||||
lang: "-",
|
||||
copyright: 0,
|
||||
sys_arch: require("os").arch(),
|
||||
mysql_config_path: "./mysql_config.json",
|
||||
@ -459,6 +460,10 @@ exports.start = function (port = 8074) {
|
||||
"utf8"
|
||||
);
|
||||
config_file = JSON.parse(config_file);
|
||||
let lang = config_file["lang"];
|
||||
if(lang == undefined){
|
||||
lang = "-";
|
||||
}
|
||||
res.write(JSON.stringify(config_file));
|
||||
res.end();
|
||||
} else if (pathName == "/setUserDataFolder") {
|
||||
|
@ -32,7 +32,7 @@
|
||||
<body>
|
||||
<div id="app">
|
||||
|
||||
<div style="padding: 10px; text-align: center;vertical-align: middle;" v-if="init">
|
||||
<div style="padding: 10px; text-align: center;vertical-align: middle;" v-if="lang=='-'">
|
||||
<h5 style="margin-top: 20px">选择语言/Select Language</h5>
|
||||
|
||||
<p><a @click="changeLang('zh')" class="btn btn-outline-primary btn-lg"
|
||||
@ -40,9 +40,6 @@
|
||||
|
||||
<p><a @click="changeLang('en')" class="btn btn-outline-primary btn-lg"
|
||||
style="margin-top: 15px; width: 300px;height:60px;padding-top:12px;">English</a></p>
|
||||
<p style="font-size: 17px">当前版本/Current Version: <b>v0.6.2</b></p>
|
||||
<p style="font-size: 17px"><a href="https://github.com/NaiboWang/EasySpider/releases"
|
||||
target="_blank">Github</a>最新版本/Newest Version:<b>{{newest_version}}</b></p>
|
||||
<!-- <p>如发现新版本更新,可从以下Github仓库下载最新版本使用/If a new version is found, you can download the latest version from the following Github repository:</p>-->
|
||||
<!-- <p></p>-->
|
||||
<div class="img-container">
|
||||
@ -92,6 +89,9 @@ For individual users, EasySpider is a completely free and ad-free open-source so
|
||||
<a href="https://www.easyspider.cn/index_english.html" target="_blank"
|
||||
style="text-align: center; font-size: 18px">Browse official website to watch tutorials</a>
|
||||
</p>
|
||||
<p style="font-size: 17px">Current Version: <b>v0.6.3</b></p>
|
||||
<p style="font-size: 17px"><a href="https://github.com/NaiboWang/EasySpider/releases"
|
||||
target="_blank">Newest</a> Version: <b>{{newest_version}}</b></p>
|
||||
<div class="img-container">
|
||||
<!-- <h5>Producer</h5>-->
|
||||
<a href="https://www.zju.edu.cn" alt="Zhejiang University" target="_blank"><img
|
||||
@ -191,6 +191,9 @@ For individual users, EasySpider is a completely free and ad-free open-source so
|
||||
<a href="https://www.easyspider.cn?lang=zh" target="_blank"
|
||||
style="text-align: center; font-size: 18px">点此访问官网查看文档/视频教程</a>
|
||||
</p>
|
||||
<p style="font-size: 17px">软件当前版本:<b>v0.6.3</b></p>
|
||||
<p style="font-size: 17px"><a href="https://github.com/NaiboWang/EasySpider/releases"
|
||||
target="_blank">官网</a>最新版本:<b>{{newest_version}}</b></p>
|
||||
<div class="img-container">
|
||||
<!-- <h5>出品方</h5>-->
|
||||
<a href="https://www.zju.edu.cn" alt="浙江大学" target="_blank"><img src="img/zju.png"></a>
|
||||
|
@ -22,7 +22,7 @@ let app = Vue.createApp({
|
||||
data() {
|
||||
return {
|
||||
init: true,
|
||||
lang: 'zh',
|
||||
lang: '-',
|
||||
user_data_folder: getUrlParam("user_data_folder"),
|
||||
copyright: 0,
|
||||
step: 0,
|
||||
@ -34,6 +34,10 @@ let app = Vue.createApp({
|
||||
if(this.copyright == 0){
|
||||
this.step = -1;
|
||||
}
|
||||
this.lang = getUrlParam("lang");
|
||||
if (this.lang == 'undefined' || this.lang == '') {
|
||||
this.lang = '-';
|
||||
}
|
||||
// 发送GET请求获取GitHub的Release API响应
|
||||
const request = new XMLHttpRequest();
|
||||
request.open('GET', `https://api.github.com/repos/NaiboWang/EasySpider/releases/latest`);
|
||||
@ -52,8 +56,9 @@ let app = Vue.createApp({
|
||||
},
|
||||
methods: {
|
||||
changeLang(lang = 'zh') {
|
||||
this.init = false;
|
||||
// this.init = false;
|
||||
this.lang = lang;
|
||||
window.electronAPI.changeLang(lang);
|
||||
},
|
||||
acceptAgreement() {
|
||||
this.step = 0;
|
||||
|
@ -11,4 +11,5 @@ contextBridge.exposeInMainWorld('electronAPI', {
|
||||
startDesign: (lang="en", user_data_folder = '', mobile=false) => ipcRenderer.send('start-design', lang, user_data_folder, mobile),
|
||||
startInvoke: (lang="en") => ipcRenderer.send('start-invoke', lang),
|
||||
acceptAgreement: () => ipcRenderer.send('accept-agreement'),
|
||||
changeLang: (lang="en") => ipcRenderer.send('change-lang', lang)
|
||||
})
|
@ -491,7 +491,7 @@ if (mobile == "true") {
|
||||
}
|
||||
|
||||
let serviceInfo = {
|
||||
"version": "0.6.2"
|
||||
"version": "0.6.3"
|
||||
};
|
||||
|
||||
function saveService(type) {
|
||||
@ -625,7 +625,7 @@ function saveService(type) {
|
||||
"links": links,
|
||||
"create_time": $("#create_time").val(),
|
||||
"update_time": formatDateTime(new Date()),
|
||||
"version": "0.6.2",
|
||||
"version": "0.6.3",
|
||||
"saveThreshold": saveThreshold,
|
||||
// "cloudflare": cloudflare,
|
||||
"quitWaitTime": parseInt($("#quitWaitTime").val()),
|
||||
|
2
ElectronJS/stealth.min.js
vendored
2
ElectronJS/stealth.min.js
vendored
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/324.json
Normal file
1
ElectronJS/tasks/324.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/325.json
Normal file
1
ElectronJS/tasks/325.json
Normal file
@ -0,0 +1 @@
|
||||
{"id":325,"name":"百度一下,你就知道","url":"https://www.baidu.com","links":"https://www.baidu.com","create_time":"2024-12-30 22:37:29","update_time":"2024-12-30 22:37:43","version":"0.6.3","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.baidu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.baidu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.baidu.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"0暖心2024 总书记的贴心话"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.baidu.com/s?wd=%E6%9A%96%E5%BF%832024+%E6%80%BB%E4%B9%A6%E8%AE%B0%E7%9A%84%E8%B4%B4%E5%BF%83%E8%AF%9D&sa=fyb_n_homepage&rsv_dl=fyb_n_homepage&from=super&cl=3&tn=baidutop10&fr=top1000&rsv_idx=2&hisfilter=1"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.baidu.com","links":"https://www.baidu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/div[5]/div[1]/div[1]/div[3]/ul[1]/li/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"skipCount":0,"allXPaths":["/html/body/div[1]/div[1]/div[5]/div[1]/div[1]/div[3]/ul[1]/li[1]/a[1]","//a[contains(., '0暖心2024 总')]","//a[@class='title-content c-link c-font-medium c-line-clamp1']","/html/body/div[last()-4]/div[last()-3]/div[last()-3]/div/div/div/ul/li[last()-9]/a"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"params":[{"nodeType":1,"contentType":8,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"0暖心2024 总书记的贴心话"}],"unique_index":"8rtq2is658sm5b58osr","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://www.baidu.com/s?wd=%E6%9A%96%E5%BF%832024+%E6%80%BB%E4%B9%A6%E8%AE%B0%E7%9A%84%E8%B4%B4%E5%BF%83%E8%AF%9D&sa=fyb_n_homepage&rsv_dl=fyb_n_homepage&from=super&cl=3&tn=baidutop10&fr=top1000&rsv_idx=2&hisfilter=1"}],"unique_index":"8rtq2is658sm5b58osr","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":0}]}}]}
|
@ -152,7 +152,7 @@ if __name__ == "__main__":
|
||||
for folder in os.listdir("./chrome_win64"):
|
||||
if folder[0].isdigit() and os.path.isdir("./chrome_win64/"+folder):
|
||||
shutil.rmtree("./chrome_win64/"+folder+"/Installer") # 删除Installer文件夹
|
||||
copy_file("./execute_win64.bat", "./chrome_win64/execute.bat")
|
||||
copy_file("./execute_win64.bat", "./chrome_win64/execute_win64.bat")
|
||||
copy_file("./stealth.min.js", "./chrome_win64/stealth.min.js")
|
||||
try:
|
||||
copy_file(
|
||||
@ -179,7 +179,7 @@ if __name__ == "__main__":
|
||||
for folder in os.listdir("./chrome_win32"):
|
||||
if folder[0].isdigit() and os.path.isdir("./chrome_win32/"+folder):
|
||||
shutil.rmtree("./chrome_win32/"+folder+"/Installer") # 删除Installer文件夹
|
||||
copy_file("./execute_win32.bat", "./chrome_win32/execute.bat")
|
||||
copy_file("./execute_win32.bat", "./chrome_win32/execute_win32.bat")
|
||||
copy_file("./stealth.min.js", "./chrome_win32/stealth.min.js")
|
||||
try:
|
||||
copy_file(
|
||||
@ -203,7 +203,7 @@ if __name__ == "__main__":
|
||||
if os.path.exists("./chrome_linux64"):
|
||||
shutil.rmtree("./chrome_linux64")
|
||||
copy_folder(linux_chrome_path, "./chrome_linux64")
|
||||
copy_file("./execute_linux64.sh", "./chrome_linux64/execute.sh")
|
||||
copy_file("./execute_linux64.sh", "./chrome_linux64/execute_linux64.sh")
|
||||
copy_file("./stealth.min.js", "./chrome_linux64/stealth.min.js")
|
||||
try:
|
||||
copy_file(
|
||||
@ -218,7 +218,7 @@ if __name__ == "__main__":
|
||||
finally:
|
||||
# Change Linux file permissions
|
||||
os.chmod("./chrome_linux64/chromedriver_linux64", 0o755)
|
||||
os.chmod("./chrome_linux64/execute.sh", 0o755)
|
||||
os.chmod("./chrome_linux64/execute_linux64.sh", 0o755)
|
||||
shutil.rmtree("./chromedrivers")
|
||||
elif sys.platform == "darwin" and platform.architecture()[0] == "64bit":
|
||||
processor = get_processor_info()
|
||||
|
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
||||
"justMyCode": false,
|
||||
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
"args": ["--ids", "[35]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
|
||||
"args": ["--ids", "[89]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
|
||||
"--read_type", "remote",
|
||||
]
|
||||
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
|
||||
|
@ -2192,7 +2192,7 @@ if __name__ == '__main__':
|
||||
"server_address": "http://localhost:8074",
|
||||
"keyboard": True, # 是否监听键盘输入
|
||||
"pause_key": "p", # 暂停键
|
||||
"version": "0.6.2",
|
||||
"version": "0.6.3",
|
||||
"docker_driver": "",
|
||||
}
|
||||
c = Config(config)
|
||||
@ -2330,9 +2330,13 @@ if __name__ == '__main__':
|
||||
print("id: ", id)
|
||||
if c.read_type == "remote":
|
||||
print("remote")
|
||||
content = requests.get(
|
||||
try:
|
||||
content = requests.get(
|
||||
c.server_address + "/queryExecutionInstance?id=" + str(id))
|
||||
service = json.loads(content.text) # 加载服务信息
|
||||
service = json.loads(content.text) # 加载服务信息
|
||||
except:
|
||||
print("Cannot connect to the server, please make sure that the EasySpider Main Program is running, or you can change the --read_type parameter to 'local' to read the task information from the local task file without keeping the EasySpider Main Program running.")
|
||||
print("无法连接到服务器,请确保EasySpider主程序正在运行,或者您可以将--read_type参数更改为'local',以实现从本地任务文件中读取任务信息而无需保持EasySpider主程序运行。")
|
||||
else:
|
||||
print("local")
|
||||
local_folder = os.path.join(os.getcwd(), "execution_instances")
|
||||
|
@ -1,14 +1,14 @@
|
||||
commandline_config==2.2.3
|
||||
requests==2.32.0
|
||||
selenium==4.16.0
|
||||
requests==2.32.3
|
||||
selenium==4.27.1
|
||||
pyinstaller==5.13.2
|
||||
Pillow==10.2.0
|
||||
xlsxwriter==3.1.9
|
||||
xlsxwriter==3.2.0
|
||||
openpyxl==3.1.2
|
||||
pymysql==1.1.1
|
||||
lxml==4.9.2
|
||||
ddddocr==1.4.10
|
||||
lxml==5.3.0
|
||||
ddddocr==1.5.6
|
||||
pynput==1.7.6
|
||||
beautifulsoup4==4.12.2
|
||||
undetected-chromedriver==3.4.7
|
||||
pandas==2.1.4
|
||||
pandas==2.2.3
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "EasySpider",
|
||||
"version": "0.6.2",
|
||||
"version": "0.6.3",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"build": "rollup -c",
|
||||
|
@ -1,5 +1,6 @@
|
||||
import config from './config.json';
|
||||
|
||||
|
||||
export var global = {
|
||||
nodeList: [], //已被选中的节点列表
|
||||
readyList: [], //预备选中的list
|
||||
|
@ -1,9 +1,28 @@
|
||||
import $ from "jquery";
|
||||
import Vue from "vue";
|
||||
import {global, getOS, readXPath, addEl, clearEl, clearReady, handleElement, clearParameters, generateParameters, generateMultiParameters, handleDescendents, generateValTable, findRelated, pushToReadyList, readyToList, combineXpath, relatedTest} from "./global.js";
|
||||
import {
|
||||
global,
|
||||
getOS,
|
||||
readXPath,
|
||||
addEl,
|
||||
clearEl,
|
||||
clearReady,
|
||||
handleElement,
|
||||
clearParameters,
|
||||
generateParameters,
|
||||
generateMultiParameters,
|
||||
handleDescendents,
|
||||
generateValTable,
|
||||
findRelated,
|
||||
pushToReadyList,
|
||||
readyToList,
|
||||
combineXpath,
|
||||
relatedTest,
|
||||
LANG
|
||||
} from "./global.js";
|
||||
import ToolKit from "./toolkit.vue";
|
||||
import iframe from "./iframe.vue";
|
||||
|
||||
import {createNotification} from './trail.js';
|
||||
|
||||
//表现逻辑层的处理
|
||||
|
||||
@ -316,11 +335,16 @@ function generateToolkit() {
|
||||
//Vue元素
|
||||
generateToolkit();
|
||||
|
||||
function closeToolkit() {
|
||||
toolkit.style.display = "none"; // 隐藏元素
|
||||
createNotification(LANG("EasySpider操作控制台已隐藏,可点击浏览器右上角扩展程序区域的EasySpider图标重新打开。", "EasySpider Toolkit is hidden. Click the EasySpider icon in the extension list (upper right corner) of the browser to reopen."));
|
||||
}
|
||||
|
||||
let closeButton = document.getElementById("closeButton");
|
||||
closeButton.addEventListener("click", function() {
|
||||
toolkit.style.display = "none"; // 隐藏元素
|
||||
closeToolkit();
|
||||
});
|
||||
let closeButtonLeft = document.getElementById("closeButtonLeft");
|
||||
closeButtonLeft.addEventListener("click", function() {
|
||||
toolkit.style.display = "none"; // 隐藏元素
|
||||
closeToolkit();
|
||||
});
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "EasySpider",
|
||||
"version": "0.6.2",
|
||||
"version": "0.6.3",
|
||||
"description": "EasySpider's chrome extension",
|
||||
"author": "Naibo Wang",
|
||||
"manifest_version": 3,
|
||||
@ -11,6 +11,7 @@
|
||||
"38": "assets/icon-38.png",
|
||||
"128": "assets/icon-128.png"
|
||||
},
|
||||
"default_popup": "popup.html",
|
||||
"default_title": "EasySpider"
|
||||
},
|
||||
"icons": {
|
||||
@ -53,6 +54,7 @@
|
||||
"storage",
|
||||
"tabs",
|
||||
"scripting",
|
||||
"activeTab",
|
||||
"notifications"
|
||||
]
|
||||
}
|
@ -1,11 +1,19 @@
|
||||
<!doctype html>
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Popup 示例</title>
|
||||
<link rel="stylesheet" type="text/css" href="popup.css">
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>EasySpider Control Panel</title>
|
||||
<link rel="stylesheet" href="style/bootstrap.min.css">
|
||||
</head>
|
||||
<body>
|
||||
<!-- <h2>EasySpider Extension</h2>-->
|
||||
EasySpider Extension, please do not disable me.
|
||||
<body class="p-4">
|
||||
<div class="text-center">
|
||||
<!-- <h3>操作</h3>-->
|
||||
<p id="title">可执行操作</p>
|
||||
<button id="show-toolkit" class="btn btn-primary" style="width: 200px">显示EasySpider操作台</button>
|
||||
<p></p>
|
||||
<button id="close-toolkit" class="btn btn-danger" style="width: 200px">关闭EasySpider操作台</button>
|
||||
</div>
|
||||
<script src="popup.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
</html>
|
||||
|
@ -1,3 +1,106 @@
|
||||
document.getElementById('clickme').addEventListener('click', () => {
|
||||
alert('Hello, World!');
|
||||
});
|
||||
import config from './content-scripts/config.json';
|
||||
|
||||
if (config.language == 'zh') {
|
||||
document.getElementById('title').innerText = '可执行操作';
|
||||
document.getElementById('show-toolkit').innerText = '显示EasySpider操作台';
|
||||
document.getElementById('close-toolkit').innerText = '关闭EasySpider操作台';
|
||||
} else {
|
||||
document.getElementById('title').innerText = 'Executable Operations';
|
||||
document.getElementById('show-toolkit').innerText = 'Show EasySpider Toolkit';
|
||||
document.getElementById('close-toolkit').innerText = 'Close EasySpider Toolkit';
|
||||
}
|
||||
|
||||
document.getElementById('show-toolkit').addEventListener('click', async () => {
|
||||
try {
|
||||
// 发送消息给 content script
|
||||
const [tab] = await chrome.tabs.query({ active: true, currentWindow: true });
|
||||
chrome.scripting.executeScript({
|
||||
target: { tabId: tab.id },
|
||||
func: showToolkit
|
||||
});
|
||||
window.close();
|
||||
} catch (error) {
|
||||
console.error('Error showing toolkit:', error);
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById('close-toolkit').addEventListener('click', async () => {
|
||||
try {
|
||||
// 发送消息给 content script
|
||||
const [tab] = await chrome.tabs.query({ active: true, currentWindow: true });
|
||||
chrome.scripting.executeScript({
|
||||
target: { tabId: tab.id },
|
||||
func: closeToolkit
|
||||
});
|
||||
window.close();
|
||||
} catch (error) {
|
||||
console.error('Error closing toolkit:', error);
|
||||
}
|
||||
});
|
||||
|
||||
// 显示操作台函数
|
||||
function showToolkit() {
|
||||
const showContainers = (documentRoot) => {
|
||||
const containers = documentRoot.querySelectorAll('#wrapperToolkit');
|
||||
containers.forEach(container => {
|
||||
if (getComputedStyle(container).display === 'none') {
|
||||
container.style.display = 'block';
|
||||
console.log('显示EasySpider操作台');
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const processIframes = (documentRoot) => {
|
||||
const iframes = documentRoot.querySelectorAll('iframe');
|
||||
iframes.forEach(iframe => {
|
||||
try {
|
||||
const iframeDoc = iframe.contentDocument || iframe.contentWindow.document;
|
||||
if (iframeDoc) {
|
||||
// 显示 iframe 内的 #wrapperToolkit
|
||||
showContainers(iframeDoc);
|
||||
processIframes(iframeDoc);
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('无法访问 iframe:', err);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
// 处理主文档和嵌套 iframe
|
||||
showContainers(document);
|
||||
processIframes(document);
|
||||
}
|
||||
|
||||
// 关闭操作台函数
|
||||
function closeToolkit() {
|
||||
const hideContainers = (documentRoot) => {
|
||||
const containers = documentRoot.querySelectorAll('#wrapperToolkit');
|
||||
containers.forEach(container => {
|
||||
if (getComputedStyle(container).display === 'block') {
|
||||
container.style.display = 'none';
|
||||
console.log('关闭EasySpider操作台');
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const processIframes = (documentRoot) => {
|
||||
const iframes = documentRoot.querySelectorAll('iframe');
|
||||
console.log("iframes", iframes);
|
||||
iframes.forEach(iframe => {
|
||||
try {
|
||||
const iframeDoc = iframe.contentDocument || iframe.contentWindow.document;
|
||||
if (iframeDoc) {
|
||||
// 隐藏 iframe 内的 #wrapperToolkit
|
||||
hideContainers(iframeDoc);
|
||||
processIframes(iframeDoc);
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('无法访问 iframe:', err);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
// 处理主文档和嵌套 iframe
|
||||
hideContainers(document);
|
||||
processIframes(document);
|
||||
}
|
||||
|
6
Extension/manifest_v3/src/style/bootstrap.min.css
vendored
Normal file
6
Extension/manifest_v3/src/style/bootstrap.min.css
vendored
Normal file
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user