mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-19 23:14:54 +08:00
Add Payment QR Code
This commit is contained in:
parent
d89665efea
commit
ef0acf838b
2
.temp_to_pub/.gitignore
vendored
2
.temp_to_pub/.gitignore
vendored
@ -1,6 +1,6 @@
|
||||
EasySpider_MacOS_all_arch/easyspider_executestage
|
||||
EasySpider_Linux64_x64/user_data
|
||||
EasySpider_windows_x86/user_data
|
||||
EasySpider_windows_x32/user_data
|
||||
EasySpider
|
||||
EasySpider.app/
|
||||
EasySpider_windows_x64/user_data
|
||||
|
@ -4,7 +4,7 @@ Welcome to promote this software to other friends.
|
||||
|
||||
This version is for Windows 10 x64 and above.
|
||||
|
||||
The Windows version supports **Windows 10 and above**. If you want to use EasySpider on windows 7, please download the Windows x86 version of EasySpider.
|
||||
The Windows version supports **Windows 10 and above**. If you want to use EasySpider on windows 7, please download the Windows x32 version of EasySpider.
|
||||
|
||||
Video Tutorial: https://youtube.com/playlist?list=PL0kEFEkWrT7mt9MUlEBV2DTo1QsaanUTp
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
支持Windows 10 x64及以上版本。
|
||||
|
||||
Windows 7此请下载Windows的32位版本使用。
|
||||
Windows 7任意版本,包括x64和x32版本,以及Windows 10 x32版本请下载Windows的32位版本使用。
|
||||
|
||||
视频教程:https://www.bilibili.com/video/BV1th411A7ey/
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,165 +0,0 @@
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.common.exceptions import NoSuchElementException
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.common.exceptions import StaleElementReferenceException, InvalidSelectorException
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
from selenium.webdriver.support.ui import Select
|
||||
from selenium.webdriver import ActionChains
|
||||
from selenium.webdriver.common.by import By
|
||||
import undetected_chromedriver as uc
|
||||
desired_capabilities = DesiredCapabilities.CHROME
|
||||
desired_capabilities["pageLoadStrategy"] = "none"
|
||||
|
||||
|
||||
|
||||
class MyChrome(webdriver.Chrome):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.iframe_env = False # 现在的环境是root还是iframe
|
||||
super().__init__(*args, **kwargs) # 调用父类的 __init__
|
||||
|
||||
def find_element(self, by=By.ID, value=None, iframe=False):
|
||||
# 在这里改变查找元素的行为
|
||||
if self.iframe_env:
|
||||
super().switch_to.default_content()
|
||||
self.iframe_env = False
|
||||
if iframe:
|
||||
# 获取所有的 iframe
|
||||
try:
|
||||
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并点击里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
try:
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
element = super().find_element(by=by, value=value)
|
||||
find_element = True
|
||||
except:
|
||||
print("No such element found in the iframe")
|
||||
# 完成操作后切回主文档
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
return element
|
||||
if not find_element:
|
||||
raise NoSuchElementException
|
||||
else:
|
||||
return super().find_element(by=by, value=value)
|
||||
|
||||
def find_elements(self, by=By.ID, value=None, iframe=False):
|
||||
# 在这里改变查找元素的行为
|
||||
if self.iframe_env:
|
||||
super().switch_to.default_content()
|
||||
self.iframe_env = False
|
||||
if iframe:
|
||||
# 获取所有的 iframe
|
||||
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并点击里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
try:
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
elements = super().find_elements(by=by, value=value)
|
||||
if len(elements) > 0:
|
||||
find_element = True
|
||||
# 完成操作后切回主文档
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
return elements
|
||||
except:
|
||||
print("No such element found in the iframe")
|
||||
if not find_element:
|
||||
raise NoSuchElementException
|
||||
else:
|
||||
return super().find_elements(by=by, value=value)
|
||||
|
||||
|
||||
class MyUCChrome(uc.Chrome):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.iframe_env = False # 现在的环境是root还是iframe
|
||||
super().__init__(*args, **kwargs) # 调用父类的 __init__
|
||||
|
||||
def find_element(self, by=By.ID, value=None, iframe=False):
|
||||
# 在这里改变查找元素的行为
|
||||
if self.iframe_env:
|
||||
super().switch_to.default_content()
|
||||
self.iframe_env = False
|
||||
if iframe:
|
||||
# 获取所有的 iframe
|
||||
try:
|
||||
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并点击里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
try:
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
element = super().find_element(by=by, value=value)
|
||||
find_element = True
|
||||
except:
|
||||
print("No such element found in the iframe")
|
||||
# 完成操作后切回主文档
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
return element
|
||||
if not find_element:
|
||||
raise NoSuchElementException
|
||||
else:
|
||||
return super().find_element(by=by, value=value)
|
||||
|
||||
def find_elements(self, by=By.ID, value=None, iframe=False):
|
||||
# 在这里改变查找元素的行为
|
||||
if self.iframe_env:
|
||||
super().switch_to.default_content()
|
||||
self.iframe_env = False
|
||||
if iframe:
|
||||
# 获取所有的 iframe
|
||||
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并点击里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
try:
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
elements = super().find_elements(by=by, value=value)
|
||||
if len(elements) > 0:
|
||||
find_element = True
|
||||
# 完成操作后切回主文档
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
return elements
|
||||
except:
|
||||
print("No such element found in the iframe")
|
||||
if not find_element:
|
||||
raise NoSuchElementException
|
||||
else:
|
||||
return super().find_elements(by=by, value=value)
|
||||
|
@ -1,348 +0,0 @@
|
||||
# 控制流程的暂停和继续
|
||||
|
||||
import csv
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
import keyboard
|
||||
from openpyxl import Workbook, load_workbook
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
import pymysql
|
||||
from lxml import etree
|
||||
|
||||
def is_valid_url(url):
|
||||
try:
|
||||
result = urlparse(url)
|
||||
return all([result.scheme, result.netloc])
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def lowercase_tags_in_xpath(xpath):
|
||||
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
|
||||
|
||||
|
||||
def on_press_creator(press_time, event):
|
||||
def on_press(key):
|
||||
try:
|
||||
if key.char == 'p':
|
||||
if press_time["is_pressed"] == False: # 没按下p键时,记录按下p键的时间
|
||||
press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = True
|
||||
else: # 按下p键时,判断按下p键的时间是否超过2.5秒
|
||||
duration = time.time() - press_time["duration"]
|
||||
if duration > 2:
|
||||
if event._flag == False:
|
||||
print("任务执行中,长按p键暂停执行。")
|
||||
print("Task is running, long press 'p' to pause.")
|
||||
# 设置Event的值为True,使得线程b可以继续执行
|
||||
event.set()
|
||||
else:
|
||||
# 设置Event的值为False,使得线程b暂停执行
|
||||
print("任务已暂停,长按p键继续执行...")
|
||||
print("Task paused, long press 'p' to continue...")
|
||||
event.clear()
|
||||
press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = False
|
||||
# print("按下p键时间:", press_time["duration"])
|
||||
except:
|
||||
pass
|
||||
return on_press
|
||||
|
||||
def on_release_creator(event, press_time):
|
||||
def on_release(key):
|
||||
try:
|
||||
# duration = time.time() - press_time["duration"]
|
||||
# # print("松开p键时间:", time.time(), "Duration: ", duration)
|
||||
# if duration > 2.5 and key.char == 'p':
|
||||
# if event._flag == False:
|
||||
# print("任务执行中,按p键暂停执行。")
|
||||
# print("Task is running, press 'p' to pause.")
|
||||
# # 设置Event的值为True,使得线程b可以继续执行
|
||||
# event.set()
|
||||
# else:
|
||||
# # 设置Event的值为False,使得线程b暂停执行
|
||||
# print("任务已暂停,按p键继续执行...")
|
||||
# print("Task paused, press 'p' to continue...")
|
||||
# event.clear()
|
||||
# press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = False
|
||||
except:
|
||||
pass
|
||||
return on_release
|
||||
|
||||
|
||||
def check_pause(key, event):
|
||||
while True:
|
||||
if keyboard.is_pressed(key): # 按下p键,暂停程序
|
||||
if event._flag == False:
|
||||
print("任务执行中,长按p键暂停执行。")
|
||||
print("Task is running, long press 'p' to pause.")
|
||||
# 设置Event的值为True,使得线程b可以继续执行
|
||||
event.set()
|
||||
else:
|
||||
# 设置Event的值为False,使得线程b暂停执行
|
||||
print("任务已暂停,长按p键继续执行...")
|
||||
print("Task paused, press 'p' to continue...")
|
||||
event.clear()
|
||||
time.sleep(1) # 每秒检查一次
|
||||
|
||||
|
||||
def download_image(url, save_directory):
|
||||
# 定义浏览器头信息
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
if is_valid_url(url):
|
||||
# 发送 GET 请求获取图片数据
|
||||
response = requests.get(url, headers=headers)
|
||||
|
||||
# 检查响应状态码是否为成功状态
|
||||
if response.status_code == requests.codes.ok:
|
||||
# 提取文件名
|
||||
file_name = url.split('/')[-1].split("?")[0]
|
||||
|
||||
# 生成唯一的新文件名
|
||||
new_file_name = file_name + '_' + \
|
||||
str(uuid.uuid4()) + '_' + file_name
|
||||
|
||||
# 构建保存路径
|
||||
save_path = os.path.join(save_directory, new_file_name)
|
||||
|
||||
# 保存图片到本地
|
||||
with open(save_path, 'wb') as file:
|
||||
file.write(response.content)
|
||||
|
||||
print("图片已成功下载到:", save_path)
|
||||
print("The image has been successfully downloaded to:", save_path)
|
||||
else:
|
||||
print("下载图片失败,请检查此图片链接是否有效:", url)
|
||||
print(
|
||||
"Failed to download image, please check if this image link is valid:", url)
|
||||
else:
|
||||
print("下载图片失败,请检查此图片链接是否有效:", url)
|
||||
print("Failed to download image, please check if this image link is valid:", url)
|
||||
|
||||
|
||||
def get_output_code(output):
|
||||
try:
|
||||
if output.find("rue") != -1: # 如果返回值中包含true
|
||||
code = 1
|
||||
else:
|
||||
code = int(output)
|
||||
except:
|
||||
code = 0
|
||||
return code
|
||||
|
||||
# 判断字段是否为空
|
||||
|
||||
|
||||
def isnull(s):
|
||||
return len(s) != 0
|
||||
|
||||
def new_line(outputParameters, maxViewLength, record):
|
||||
line = []
|
||||
i = 0
|
||||
for value in outputParameters.values():
|
||||
line.append(value)
|
||||
if record[i]:
|
||||
print(value[:maxViewLength], " ", end="")
|
||||
i += 1
|
||||
print("")
|
||||
return line
|
||||
|
||||
def write_to_csv(file_name, data, record):
|
||||
with open(file_name, 'a', encoding='utf-8-sig', newline="") as f:
|
||||
f_csv = csv.writer(f)
|
||||
for line in data:
|
||||
to_write = []
|
||||
for i in range(len(line)):
|
||||
if record[i]:
|
||||
to_write.append(line[i])
|
||||
f_csv.writerow(to_write)
|
||||
f.close()
|
||||
|
||||
|
||||
def write_to_excel(file_name, data, types, record):
|
||||
first = False
|
||||
if os.path.exists(file_name):
|
||||
# 加载现有的工作簿
|
||||
wb = load_workbook(file_name)
|
||||
ws = wb.active
|
||||
else:
|
||||
# 创建新的工作簿和工作表
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
first = True
|
||||
# 追加数据到工作表
|
||||
for line in data:
|
||||
if not first: # 如果不是第一行,需要转换数据类型
|
||||
for i in range(len(line)):
|
||||
if types[i] == "int" or types[i] == "bigInt":
|
||||
try:
|
||||
line[i] = int(line[i])
|
||||
except:
|
||||
line[i] = 0
|
||||
elif types[i] == "double":
|
||||
try:
|
||||
line[i] = float(line[i])
|
||||
except:
|
||||
line[i] = 0.0
|
||||
else:
|
||||
first = False
|
||||
to_write = []
|
||||
for i in range(len(line)):
|
||||
if record[i]:
|
||||
to_write.append(line[i])
|
||||
ws.append(to_write)
|
||||
# 保存工作簿
|
||||
wb.save(file_name)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Time:
|
||||
def __init__(self, type1=""):
|
||||
self.t = int(round(time.time() * 1000))
|
||||
self.type = type1
|
||||
|
||||
def end(self):
|
||||
at = int(round(time.time() * 1000))
|
||||
print("Time used for", self.type, ":", at - self.t, "ms")
|
||||
|
||||
|
||||
class myMySQL:
|
||||
def __init__(self, config_file="mysql_config.json"):
|
||||
# 读取配置文件
|
||||
try:
|
||||
with open(config_file, 'r') as f:
|
||||
config = json.load(f)
|
||||
host = config["host"]
|
||||
port = config["port"]
|
||||
user = config["user"]
|
||||
passwd = config["password"]
|
||||
db = config["database"]
|
||||
except:
|
||||
print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在。")
|
||||
print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.")
|
||||
try:
|
||||
self.conn = pymysql.connect(
|
||||
host=host, port=port, user=user, passwd=passwd, db=db)
|
||||
print("成功连接到数据库。")
|
||||
print("Successfully connected to the database.")
|
||||
except:
|
||||
print("连接数据库失败,请检查配置文件是否正确。")
|
||||
print("Failed to connect to the database, please check if the configuration file is correct.")
|
||||
|
||||
def create_table(self, table_name, parameters):
|
||||
self.table_name = table_name
|
||||
self.field_sql = "("
|
||||
cursor = self.conn.cursor()
|
||||
# 检查表是否存在
|
||||
cursor.execute("SHOW TABLES LIKE '%s'" % table_name)
|
||||
result = cursor.fetchone()
|
||||
|
||||
sql = "CREATE TABLE " + table_name + " (_id INT AUTO_INCREMENT PRIMARY KEY, "
|
||||
for item in parameters:
|
||||
if item["recordASField"]:
|
||||
name = item['name']
|
||||
if item['type'] == 'int':
|
||||
sql += f"{name} INT, "
|
||||
elif item['type'] == 'double':
|
||||
sql += f"{name} DOUBLE, "
|
||||
elif item['type'] == 'text':
|
||||
sql += f"{name} TEXT, "
|
||||
elif item['type'] == 'mediumText':
|
||||
sql += f"{name} MEDIUMTEXT, "
|
||||
elif item['type'] == 'longText':
|
||||
sql += f"{name} LONGTEXT, "
|
||||
elif item['type'] == 'datetime':
|
||||
sql += f"{name} DATETIME, "
|
||||
elif item['type'] == 'date':
|
||||
sql += f"{name} DATE, "
|
||||
elif item['type'] == 'time':
|
||||
sql += f"{name} TIME, "
|
||||
elif item['type'] == 'varchar':
|
||||
sql += f"{name} VARCHAR(255), "
|
||||
elif item['type'] == 'bigInt':
|
||||
sql += f"{name} BIGINT, "
|
||||
self.field_sql += f"{name}, "
|
||||
# 移除最后的逗号并添加闭合的括号
|
||||
sql = sql.rstrip(', ') + ")"
|
||||
self.field_sql = self.field_sql.rstrip(', ') + ")"
|
||||
|
||||
# 如果表不存在,创建它
|
||||
if not result:
|
||||
# 执行SQL命令
|
||||
cursor.execute(sql)
|
||||
else:
|
||||
print("数据表" + table_name + "已存在。")
|
||||
print("The data table " + table_name + " already exists.")
|
||||
cursor.close()
|
||||
|
||||
def write_to_mysql(self, OUTPUT, record, types):
|
||||
# 创建一个游标对象
|
||||
cursor = self.conn.cursor()
|
||||
|
||||
for line in OUTPUT:
|
||||
for i in range(len(line)):
|
||||
if types[i] == "int" or types[i] == "bigInt":
|
||||
try:
|
||||
line[i] = int(line[i])
|
||||
except:
|
||||
line[i] = 0
|
||||
elif types[i] == "double":
|
||||
try:
|
||||
line[i] = float(line[i])
|
||||
except:
|
||||
line[i] = 0.0
|
||||
elif types[i] == "datetime":
|
||||
try:
|
||||
line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d %H:%M:%S')
|
||||
except:
|
||||
line[i] = datetime.datetime.strptime("1970-01-01 00:00:00", '%Y-%m-%d %H:%M:%S')
|
||||
elif types[i] == "date":
|
||||
try:
|
||||
line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d')
|
||||
except:
|
||||
line[i] = datetime.datetime.strptime("1970-01-01", '%Y-%m-%d')
|
||||
elif types[i] == "time":
|
||||
try:
|
||||
line[i] = datetime.datetime.strptime(line[i], '%H:%M:%S')
|
||||
except:
|
||||
line[i] = datetime.datetime.strptime("00:00:00", '%H:%M:%S')
|
||||
to_write = []
|
||||
for i in range(len(line)):
|
||||
if record[i]:
|
||||
to_write.append(line[i])
|
||||
# 构造插入数据的 SQL 语句
|
||||
sql = f"INSERT INTO "+ self.table_name +" "+self.field_sql+" VALUES ("
|
||||
for item in to_write:
|
||||
sql += "%s, "
|
||||
# 移除最后的逗号并添加闭合的括号
|
||||
sql = sql.rstrip(', ') + ")"
|
||||
# 执行 SQL 语句
|
||||
try:
|
||||
cursor.execute(sql, to_write)
|
||||
except Exception as e:
|
||||
print("Error:", e)
|
||||
print("Error SQL:", sql, to_write)
|
||||
print("插入数据库错误,请查看以上的错误提示,然后检查数据的类型是否正确,是否文本过长(超过一万的文本类型要设置为大文本)。")
|
||||
print("Inserting database error, please check the above error, and then check whether the data type is correct, whether the text is too long (text type over 10,000 should be set to large text).")
|
||||
print("重新执行任务时,请删除数据库中的数据表" + self.table_name + ",然后再次运行程序。")
|
||||
print("When re-executing the task, please delete the data table " + self.table_name + " in the database, and then run the program again.")
|
||||
|
||||
# 提交到数据库执行
|
||||
self.conn.commit()
|
||||
|
||||
# 关闭游标和连接
|
||||
cursor.close()
|
||||
|
||||
def close(self):
|
||||
self.conn.close()
|
||||
print("成功关闭数据库。")
|
||||
print("Successfully closed the database.")
|
@ -73,22 +73,22 @@ if __name__ == "__main__":
|
||||
compress_folder_to_7z("./EasySpider_windows_x64", file_name)
|
||||
print(f"Compress {file_name} successfully!")
|
||||
elif sys.platform == "win32" and platform.architecture()[0] == "32bit":
|
||||
file_name = f"EasySpider_{easyspider_version}_windows_x86.7z"
|
||||
if os.path.exists("./EasySpider_windows_x86/user_data"):
|
||||
shutil.rmtree("./EasySpider_windows_x86/user_data")
|
||||
if os.path.exists("./EasySpider_windows_x86/Data"):
|
||||
shutil.rmtree("./EasySpider_windows_x86/Data")
|
||||
if os.path.exists("./EasySpider_windows_x86/execution_instances"):
|
||||
shutil.rmtree("./EasySpider_windows_x86/execution_instances")
|
||||
if os.path.exists("./EasySpider_windows_x86/config.json"):
|
||||
os.remove("./EasySpider_windows_x86/config.json")
|
||||
if os.path.exists("./EasySpider_windows_x86/mysql_config.json"):
|
||||
os.remove("./EasySpider_windows_x86/mysql_config.json")
|
||||
os.mkdir("./EasySpider_windows_x86/Data")
|
||||
os.mkdir("./EasySpider_windows_x86/execution_instances")
|
||||
compress_folder_to_7z_split("./EasySpider_windows_x86", file_name)
|
||||
file_name = f"EasySpider_{easyspider_version}_windows_x32.7z"
|
||||
if os.path.exists("./EasySpider_windows_x32/user_data"):
|
||||
shutil.rmtree("./EasySpider_windows_x32/user_data")
|
||||
if os.path.exists("./EasySpider_windows_x32/Data"):
|
||||
shutil.rmtree("./EasySpider_windows_x32/Data")
|
||||
if os.path.exists("./EasySpider_windows_x32/execution_instances"):
|
||||
shutil.rmtree("./EasySpider_windows_x32/execution_instances")
|
||||
if os.path.exists("./EasySpider_windows_x32/config.json"):
|
||||
os.remove("./EasySpider_windows_x32/config.json")
|
||||
if os.path.exists("./EasySpider_windows_x32/mysql_config.json"):
|
||||
os.remove("./EasySpider_windows_x32/mysql_config.json")
|
||||
os.mkdir("./EasySpider_windows_x32/Data")
|
||||
os.mkdir("./EasySpider_windows_x32/execution_instances")
|
||||
compress_folder_to_7z_split("./EasySpider_windows_x32", file_name)
|
||||
print(f"Compress {file_name} Split successfully!")
|
||||
compress_folder_to_7z("./EasySpider_windows_x86", file_name)
|
||||
compress_folder_to_7z("./EasySpider_windows_x32", file_name)
|
||||
print(f"Compress {file_name} successfully!")
|
||||
elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
|
||||
file_name = f"EasySpider_{easyspider_version}_Linux_x64.tar.xz"
|
||||
|
@ -47,7 +47,7 @@ If you already have Chrome installed on your local machine, you can directly exe
|
||||
下载一个Chrome:[https://www.google.com/chrome/](https://www.google.com/chrome/),然后找到Chrome安装后的文件夹,如`C:\Program Files\Google\Chrome\Application`,把这个文件夹拷贝到此`ElectronJS`文件夹内,并按照以下格式更名:
|
||||
|
||||
```
|
||||
chrome_win32/ # for windows x86
|
||||
chrome_win32/ # for windows x32
|
||||
chrome_win64/ # for windows x64
|
||||
chrome_linux64/ # for linux x64
|
||||
chrome_mac64/ # for mac x64
|
||||
@ -56,7 +56,7 @@ chrome_mac64/ # for mac x64
|
||||
然后,从下面的页面下载和**自己安装的Chrome版本一致**的Chromedriver:[https://chromedriver.chromium.org/downloads](https://chromedriver.chromium.org/downloads),把chromedriver放入刚刚的chrome文件夹内,并更名为下面的格式:
|
||||
|
||||
```
|
||||
chromedriver_win32.exe # for windows x86
|
||||
chromedriver_win32.exe # for windows x32
|
||||
chromedriver_win64.exe # for windows x64
|
||||
chromedriver_linux64 # for linux x64
|
||||
chromedriver_mac64 # for mac x64
|
||||
@ -70,7 +70,7 @@ chromedriver_mac64 # for mac x64
|
||||
Download a Chrome from the Internet: https://www.google.com/chrome/, and then put them into this folder, with name format of the following:
|
||||
|
||||
```
|
||||
chrome_win32/ # for windows x86
|
||||
chrome_win32/ # for windows x32
|
||||
chrome_win64/ # for windows x64
|
||||
chrome_linux64/ # for linux x64
|
||||
chrome_mac64/ # for mac x64
|
||||
@ -79,7 +79,7 @@ chrome_mac64/ # for mac x64
|
||||
Then, download the corresponding chromedriver from the Internet on this page: https://chromedriver.chromium.org/downloads, note the **chromedriver version must match your chrome version!!!** And put them into corresponding chrome folder, with name format of the following:
|
||||
|
||||
```
|
||||
chromedriver_win32.exe # for windows x86
|
||||
chromedriver_win32.exe # for windows x32
|
||||
chromedriver_win64.exe # for windows x64
|
||||
chromedriver_linux64 # for linux x64
|
||||
chromedriver_mac64 # for mac x64
|
||||
|
@ -6,20 +6,20 @@ rmdir /s /q out\EasySpider\resources\app\.idea
|
||||
rmdir /s /q out\EasySpider\resources\app\tasks
|
||||
rmdir /s /q out\EasySpider\resources\app\execution_instances
|
||||
rmdir /s /q out\EasySpider\resources\app\user_data
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x86\EasySpider
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x32\EasySpider
|
||||
del out\EasySpider\resources\app\vs_BuildTools.exe
|
||||
move out\EasySpider ..\.temp_to_pub\EasySpider_windows_x86\EasySpider
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x86\Code
|
||||
mkdir ..\.temp_to_pub\EasySpider_windows_x86\Code
|
||||
copy ..\ExecuteStage\easyspider_executestage.py ..\.temp_to_pub\EasySpider_windows_x86\Code
|
||||
copy ..\ExecuteStage\myChrome.py ..\.temp_to_pub\EasySpider_windows_x86\Code
|
||||
copy ..\ExecuteStage\utils.py ..\.temp_to_pub\EasySpider_windows_x86\Code
|
||||
copy ..\ExecuteStage\requirements.txt ..\.temp_to_pub\EasySpider_windows_x86\Code
|
||||
xcopy ..\ExecuteStage\undetected_chromedriver_ES ..\.temp_to_pub\EasySpider_windows_x86\Code\undetected_chromedriver_ES /E /I /Y
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x86\user_data
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x86\execution_instances
|
||||
mkdir ..\.temp_to_pub\EasySpider_windows_x86\execution_instances
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x86\Data
|
||||
mkdir ..\.temp_to_pub\EasySpider_windows_x86\Data
|
||||
move out\EasySpider ..\.temp_to_pub\EasySpider_windows_x32\EasySpider
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x32\Code
|
||||
mkdir ..\.temp_to_pub\EasySpider_windows_x32\Code
|
||||
copy ..\ExecuteStage\easyspider_executestage.py ..\.temp_to_pub\EasySpider_windows_x32\Code
|
||||
copy ..\ExecuteStage\myChrome.py ..\.temp_to_pub\EasySpider_windows_x32\Code
|
||||
copy ..\ExecuteStage\utils.py ..\.temp_to_pub\EasySpider_windows_x32\Code
|
||||
copy ..\ExecuteStage\requirements.txt ..\.temp_to_pub\EasySpider_windows_x32\Code
|
||||
xcopy ..\ExecuteStage\undetected_chromedriver_ES ..\.temp_to_pub\EasySpider_windows_x32\Code\undetected_chromedriver_ES /E /I /Y
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x32\user_data
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x32\execution_instances
|
||||
mkdir ..\.temp_to_pub\EasySpider_windows_x32\execution_instances
|
||||
rmdir /s /q ..\.temp_to_pub\EasySpider_windows_x32\Data
|
||||
mkdir ..\.temp_to_pub\EasySpider_windows_x32\Data
|
||||
del EasySpider_en.crx
|
||||
del EasySpider_zh.crx
|
14
Readme.md
14
Readme.md
@ -38,6 +38,20 @@ A visual code-free/no-code web crawler/spider, just select the content you want
|
||||
|
||||
Refer to the [Releases Page](https://github.com/NaiboWang/EasySpider/releases) to download the latest version of EasySpider.
|
||||
|
||||
## 支持作者/Support Author
|
||||
|
||||
易采集EasySpider是一款完全免费无广告的开源软件,软件开发和维护全靠作者用爱发电,因此您可以选择支持作者让作者有更多的热情和精力维护此软件,或者您使用了此软件进行了盈利,欢迎您通过下面的方式支持作者:
|
||||
|
||||
1. 支付宝账号:naibowang@foxmail.com,也可以扫描下方二维码。
|
||||
2. 微信收款:扫描下方二维码。
|
||||
3. PayPal账号:naibowang,也可以扫描下方二维码。
|
||||
|
||||
Support author at paypal if you like this software, or use it to make profit: naibowang
|
||||
|
||||

|
||||
|
||||
|
||||
|
||||
## 文档/Documentation
|
||||
|
||||
请点此进入[教程文档](https://github.com/NaiboWang/EasySpider/wiki),如有英文可暂时翻译一下,或看作者的[硕士毕业论文](Docs/%E9%9D%A2%E5%90%91WEB%E5%BA%94%E7%94%A8%E7%9A%84%E6%99%BA%E8%83%BD%E5%8C%96%E6%9C%8D%E5%8A%A1%E5%B0%81%E8%A3%85%E7%B3%BB%E7%BB%9F%E8%AE%BE%E8%AE%A1%E4%B8%8E%E5%AE%9E%E7%8E%B0.pdf)(主要看第三章和第五章)。
|
||||
|
BIN
media/QRCODES.png
Normal file
BIN
media/QRCODES.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 220 KiB |
Loading…
x
Reference in New Issue
Block a user