Fit for MacOS Arm version

This commit is contained in:
Naibo_Mac_M2 2023-09-12 21:07:17 +08:00
parent eb5715066b
commit b65cafdcf2
11 changed files with 1004 additions and 432 deletions

View File

@ -12,6 +12,8 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
import sys
desired_capabilities = DesiredCapabilities.CHROME desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none" desired_capabilities["pageLoadStrategy"] = "none"
@ -89,9 +91,13 @@ class MyChrome(webdriver.Chrome):
else: else:
return super().find_elements(by=by, value=value) return super().find_elements(by=by, value=value)
import sys # MacOS不支持直接打包带Cloudflare的功能如果要自己编译运行可以把这个if去掉然后配置好浏览器和driver路径
if sys.platform != "darwin": # MacOS不支持Cloudflare if sys.platform != "darwin":
ES = True
if ES: # 用自己写的ES版本
import undetected_chromedriver_ES as uc import undetected_chromedriver_ES as uc
else:
import undetected_chromedriver as uc
class MyUCChrome(uc.Chrome): class MyUCChrome(uc.Chrome):

View File

@ -15,6 +15,7 @@ from urllib.parse import urlparse
import pymysql import pymysql
from lxml import etree from lxml import etree
def is_valid_url(url): def is_valid_url(url):
try: try:
result = urlparse(url) result = urlparse(url)
@ -22,6 +23,7 @@ def is_valid_url(url):
except ValueError: except ValueError:
return False return False
def lowercase_tags_in_xpath(xpath): def lowercase_tags_in_xpath(xpath):
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath) return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
@ -53,6 +55,7 @@ def on_press_creator(press_time, event):
pass pass
return on_press return on_press
def on_release_creator(event, press_time): def on_release_creator(event, press_time):
def on_release(key): def on_release(key):
try: try:
@ -92,12 +95,13 @@ def on_release_creator(event, press_time):
# time.sleep(1) # 每秒检查一次 # time.sleep(1) # 每秒检查一次
def download_image(url, save_directory): def download_image(browser, url, save_directory):
# 定义浏览器头信息 # 定义浏览器头信息
headers = { headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
} }
if is_valid_url(url): if is_valid_url(url):
try:
# 发送 GET 请求获取图片数据 # 发送 GET 请求获取图片数据
response = requests.get(url, headers=headers) response = requests.get(url, headers=headers)
@ -117,15 +121,19 @@ def download_image(url, save_directory):
with open(save_path, 'wb') as file: with open(save_path, 'wb') as file:
file.write(response.content) file.write(response.content)
print("图片已成功下载到:", save_path) browser.print_and_log("图片已成功下载到:", save_path)
print("The image has been successfully downloaded to:", save_path) browser.print_and_log(
"The image has been successfully downloaded to:", save_path)
else: else:
print("下载图片失败,请检查此图片链接是否有效:", url) browser.print_and_log("下载图片失败,请检查此图片链接是否有效:", url)
print( browser.print_and_log(
"Failed to download image, please check if this image link is valid:", url) "Failed to download image, please check if this image link is valid:", url)
except Exception as e:
browser.print_and_log("下载图片失败|Error downloading image: ", e)
else: else:
print("下载图片失败,请检查此图片链接是否有效:", url) browser.print_and_log("下载图片失败,请检查此图片链接是否有效:", url)
print("Failed to download image, please check if this image link is valid:", url) browser.print_and_log(
"Failed to download image, please check if this image link is valid:", url)
def get_output_code(output): def get_output_code(output):
@ -141,9 +149,10 @@ def get_output_code(output):
# 判断字段是否为空 # 判断字段是否为空
def isnull(s): def isnotnull(s):
return len(s) != 0 return len(s) != 0
def new_line(outputParameters, maxViewLength, record): def new_line(outputParameters, maxViewLength, record):
line = [] line = []
i = 0 i = 0
@ -155,6 +164,7 @@ def new_line(outputParameters, maxViewLength, record):
print("") print("")
return line return line
def write_to_csv(file_name, data, record): def write_to_csv(file_name, data, record):
with open(file_name, 'a', encoding='utf-8-sig', newline="") as f: with open(file_name, 'a', encoding='utf-8-sig', newline="") as f:
f_csv = csv.writer(f) f_csv = csv.writer(f)
@ -167,6 +177,61 @@ def write_to_csv(file_name, data, record):
f.close() f.close()
def eval_repl(matchobj):
print(matchobj.group(1))
return str(eval(matchobj.group(1), globals(), locals()))
def replace_field_values(orginal_text, outputParameters, browser=None):
pattern = r'Field\["([^"]+)"\]'
try:
replaced_text = re.sub(
pattern, lambda match: outputParameters.get(match.group(1), ''), orginal_text)
if replaced_text.find("EVAL") != -1: # 如果返回值中包含EVAL
replaced_text = replaced_text.replace("self.", "browser.")
replaced_text = re.sub(r'EVAL\("(.*?)"\)', lambda match: str(eval(match.group(1))), replaced_text)
except:
replaced_text = orginal_text
return replaced_text
def write_to_json(file_name, data, types, record, keys):
keys = list(keys)
# Prepare empty list for data
data_to_write = []
# Tranform data and append to list
for line in data:
to_write = {}
for i in range(len(line)):
if types[i] == "int" or types[i] == "bigInt":
try:
line[i] = int(line[i])
except:
line[i] = 0
elif types[i] == "double":
try:
line[i] = float(line[i])
except:
line[i] = 0.0
if record[i]:
to_write.update({keys[i]: line[i]})
data_to_write.append(to_write)
try:
# read data from JSON
with open(file_name, 'r', encoding='utf-8') as f:
json_data = json.load(f)
except:
json_data = []
json_data.extend(data_to_write)
# write data to JSON
with open(file_name, 'w', encoding='utf-8') as f:
json.dump(json_data, f, ensure_ascii=False)
def write_to_excel(file_name, data, types, record): def write_to_excel(file_name, data, types, record):
first = False first = False
if os.path.exists(file_name): if os.path.exists(file_name):
@ -203,9 +268,6 @@ def write_to_excel(file_name, data, types, record):
wb.save(file_name) wb.save(file_name)
class Time: class Time:
def __init__(self, type1=""): def __init__(self, type1=""):
self.t = int(round(time.time() * 1000)) self.t = int(round(time.time() * 1000))
@ -223,27 +285,33 @@ class myMySQL:
if sys.platform == "darwin": if sys.platform == "darwin":
if config_file.find("./") >= 0: if config_file.find("./") >= 0:
config_file = config_file.replace("./", "") config_file = config_file.replace("./", "")
config_file = os.path.expanduser("~/Library/Application Support/EasySpider/" + config_file) config_file = os.path.expanduser(
"~/Library/Application Support/EasySpider/" + config_file)
print("MySQL config file path: ", config_file) print("MySQL config file path: ", config_file)
with open(config_file, 'r') as f: with open(config_file, 'r') as f:
config = json.load(f) config = json.load(f)
host = config["host"] self.host = config["host"]
port = config["port"] self.port = config["port"]
user = config["username"] self.user = config["username"]
passwd = config["password"] self.passwd = config["password"]
db = config["database"] self.db = config["database"]
except Exception as e: except Exception as e:
print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在。") print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在,或配置信息是否有误。")
print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.") print("Failed to read configuration file, please check if the configuration file: " +
config_file+" exists, or if the configuration information is incorrect.")
print(e) print(e)
self.connect()
def connect(self):
try: try:
self.conn = pymysql.connect( self.conn = pymysql.connect(
host=host, port=port, user=user, passwd=passwd, db=db) host=self.host, port=self.port, user=self.user, passwd=self.passwd, db=self.db)
print("成功连接到数据库。") print("成功连接到数据库。")
print("Successfully connected to the database.") print("Successfully connected to the database.")
except: except:
print("连接数据库失败,请检查配置文件是否正确。") print("连接数据库失败,请检查配置文件是否正确。")
print("Failed to connect to the database, please check if the configuration file is correct.") print(
"Failed to connect to the database, please check if the configuration file is correct.")
def create_table(self, table_name, parameters): def create_table(self, table_name, parameters):
self.table_name = table_name self.table_name = table_name
@ -253,7 +321,8 @@ class myMySQL:
cursor.execute("SHOW TABLES LIKE '%s'" % table_name) cursor.execute("SHOW TABLES LIKE '%s'" % table_name)
result = cursor.fetchone() result = cursor.fetchone()
sql = "CREATE TABLE " + table_name + " (_id INT AUTO_INCREMENT PRIMARY KEY, " sql = "CREATE TABLE " + table_name + \
" (_id INT AUTO_INCREMENT PRIMARY KEY, "
for item in parameters: for item in parameters:
if item["recordASField"]: if item["recordASField"]:
name = item['name'] name = item['name']
@ -309,25 +378,32 @@ class myMySQL:
line[i] = 0.0 line[i] = 0.0
elif types[i] == "datetime": elif types[i] == "datetime":
try: try:
line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d %H:%M:%S') line[i] = datetime.datetime.strptime(
line[i], '%Y-%m-%d %H:%M:%S')
except: except:
line[i] = datetime.datetime.strptime("1970-01-01 00:00:00", '%Y-%m-%d %H:%M:%S') line[i] = datetime.datetime.strptime(
"1970-01-01 00:00:00", '%Y-%m-%d %H:%M:%S')
elif types[i] == "date": elif types[i] == "date":
try: try:
line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d') line[i] = datetime.datetime.strptime(
line[i], '%Y-%m-%d')
except: except:
line[i] = datetime.datetime.strptime("1970-01-01", '%Y-%m-%d') line[i] = datetime.datetime.strptime(
"1970-01-01", '%Y-%m-%d')
elif types[i] == "time": elif types[i] == "time":
try: try:
line[i] = datetime.datetime.strptime(line[i], '%H:%M:%S') line[i] = datetime.datetime.strptime(
line[i], '%H:%M:%S')
except: except:
line[i] = datetime.datetime.strptime("00:00:00", '%H:%M:%S') line[i] = datetime.datetime.strptime(
"00:00:00", '%H:%M:%S')
to_write = [] to_write = []
for i in range(len(line)): for i in range(len(line)):
if record[i]: if record[i]:
to_write.append(line[i]) to_write.append(line[i])
# 构造插入数据的 SQL 语句 # 构造插入数据的 SQL 语句
sql = f"INSERT INTO "+ self.table_name +" "+self.field_sql+" VALUES (" sql = f"INSERT INTO " + self.table_name + \
" "+self.field_sql+" VALUES ("
for item in to_write: for item in to_write:
sql += "%s, " sql += "%s, "
# 移除最后的逗号并添加闭合的括号 # 移除最后的逗号并添加闭合的括号
@ -335,13 +411,21 @@ class myMySQL:
# 执行 SQL 语句 # 执行 SQL 语句
try: try:
cursor.execute(sql, to_write) cursor.execute(sql, to_write)
except pymysql.OperationalError as e:
print("Error:", e)
print("Try to reconnect to the database...")
self.connect()
cursor = self.conn.cursor() # 重新创建游标对象
cursor.execute(sql, to_write) # 重新执行SQL语句
# self.write_to_mysql(OUTPUT, record, types)
except Exception as e: except Exception as e:
print("Error:", e) print("Error:", e)
print("Error SQL:", sql, to_write) print("Error SQL:", sql, to_write)
print("插入数据库错误,请查看以上的错误提示,然后检查数据的类型是否正确,是否文本过长(超过一万的文本类型要设置为大文本)。") print("插入数据库错误,请查看以上的错误提示,然后检查数据的类型是否正确,是否文本过长(超过一万的文本类型要设置为大文本)。")
print("Inserting database error, please check the above error, and then check whether the data type is correct, whether the text is too long (text type over 10,000 should be set to large text).") print("Inserting database error, please check the above error, and then check whether the data type is correct, whether the text is too long (text type over 10,000 should be set to large text).")
print("重新执行任务时,请删除数据库中的数据表" + self.table_name + ",然后再次运行程序。") print("重新执行任务时,请删除数据库中的数据表" + self.table_name + ",然后再次运行程序。")
print("When re-executing the task, please delete the data table " + self.table_name + " in the database, and then run the program again.") print("When re-executing the task, please delete the data table " +
self.table_name + " in the database, and then run the program again.")
# 提交到数据库执行 # 提交到数据库执行
self.conn.commit() self.conn.commit()

View File

@ -2,4 +2,14 @@ Due to the complex security settings of MacOS, the issue of being unable to open
https://github.com/NaiboWang/EasySpider/wiki/MacOS-Guide https://github.com/NaiboWang/EasySpider/wiki/MacOS-Guide
For the Arm version, if it shows "the package is damaged", you need to use the following command to modify the package attributes:
xattr -cr Your EasySpider.app file path
For example:
xattr -cr /Users/username/Downloads/EasySpider_MacOS_all_arch/EasySpider.app
Then try to open it again.
File access permissions must be granted, microphone permissions are not necessary at all, and the author is not sure why microphone permissions are being requested, so they can be declined. File access permissions must be granted, microphone permissions are not necessary at all, and the author is not sure why microphone permissions are being requested, so they can be declined.

View File

@ -1,3 +1,13 @@
由于MacOS复杂的安全性设置初次打开软件会显示未验证开发者从而不允许打开的问题请参考以下视频来查看MacOS版本如何打开软件和执行任务https://www.bilibili.com/video/BV1WL411h71r 由于MacOS复杂的安全性设置初次打开软件会显示未验证开发者从而不允许打开的问题请参考以下视频来查看MacOS版本如何打开软件和执行任务https://www.bilibili.com/video/BV1WL411h71r
对于Arm版本如果显示“包已损坏”则需要使用下面的命令修改包属性
xattr -cr 你的EasySpider.app文件路径
如:
Xattr -cr /Users/用户名/Downloads/EasySpider_MacOS_all_arch/EasySpider.app
然后再次尝试打开。
文件访问权限必须给,麦克风权限完全用不到,作者也不清楚为什么会需要麦克风,因此可以拒绝。 文件访问权限必须给,麦克风权限完全用不到,作者也不清楚为什么会需要麦克风,因此可以拒绝。

Binary file not shown.

Binary file not shown.

View File

@ -7,8 +7,8 @@ cd ../../ElectronJS
rm -rf out rm -rf out
rm -r ../.temp_to_pub/EasySpider_MacOS_all_arch/EasySpider.app rm -r ../.temp_to_pub/EasySpider_MacOS_all_arch/EasySpider.app
npm run make npm run make
unzip out/make/zip/darwin/x64/EasySpider-darwin-x64* -d ../.temp_to_pub/EasySpider_MacOS_all_arch/ unzip out/make/zip/darwin/*64/EasySpider-darwin* -d ../.temp_to_pub/EasySpider_MacOS_all_arch/
# mv out/EasySpider-darwin-x64/EasySpider.app ../.temp_to_pub/EasySpider_MacOS_all_arch/ # mv out/EasySpider-darwin-*64/EasySpider.app ../.temp_to_pub/EasySpider_MacOS_all_arch/
rm ../.temp_to_pub/EasySpider_MacOS_all_arch/EasySpider.app/Contents/Resources/app/VS_BuildTools.exe rm ../.temp_to_pub/EasySpider_MacOS_all_arch/EasySpider.app/Contents/Resources/app/VS_BuildTools.exe
rm -r ../.temp_to_pub/EasySpider_MacOS_all_arch/EasySpider.app/Contents/Resources/app/chrome_win64 rm -r ../.temp_to_pub/EasySpider_MacOS_all_arch/EasySpider.app/Contents/Resources/app/chrome_win64
rm -r ../.temp_to_pub/EasySpider_MacOS_all_arch/EasySpider.app/Contents/Resources/app/chromedrivers rm -r ../.temp_to_pub/EasySpider_MacOS_all_arch/EasySpider.app/Contents/Resources/app/chromedrivers

View File

@ -1761,8 +1761,8 @@ class BrowserThread(Thread):
if __name__ == '__main__': if __name__ == '__main__':
# from multiprocessing import freeze_support from multiprocessing import freeze_support
# freeze_support() # 防止无限死循环多开 freeze_support() # 防止无限死循环多开
# 如果需要调试程序,请在命令行参数中加入--keyboard 0 来禁用键盘监听以提升调试速度 # 如果需要调试程序,请在命令行参数中加入--keyboard 0 来禁用键盘监听以提升调试速度
# If you need to debug the program, please add --keyboard 0 in the command line parameters to disable keyboard listening to improve debugging speed # If you need to debug the program, please add --keyboard 0 in the command line parameters to disable keyboard listening to improve debugging speed

View File

@ -1,7 +1,7 @@
commandline_config==2.2.3 commandline_config==2.2.3
requests==2.31.0 requests==2.31.0
selenium==4.5.0 selenium==4.5.0
pyinstaller==5.9.0 pyinstaller==5.13.2
Pillow==9.4.0 Pillow==9.4.0
openpyxl==3.1.2 openpyxl==3.1.2
pymysql==1.1.0 pymysql==1.1.0

View File

@ -1 +1 @@
{"language":"en"} {"language":"zh"}