MacOS version v0.3.5

This commit is contained in:
Naibo_Mac 2023-07-10 08:58:48 +08:00
parent 35b9494d42
commit 8c5267d66c
20 changed files with 282 additions and 177 deletions

View File

@ -5,6 +5,7 @@ EasySpider
EasySpider.app/ EasySpider.app/
EasySpider_windows_x64/user_data EasySpider_windows_x64/user_data
*.tmp *.tmp
*.tar.gz
*.7z* *.7z*
config.json config.json
mysql_config.json mysql_config.json

View File

@ -15,6 +15,8 @@ import time
import requests import requests
from urllib.parse import urljoin from urllib.parse import urljoin
from lxml import etree from lxml import etree
# import undetected_chromedriver as uc
from pynput.keyboard import Key, Listener
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
@ -29,7 +31,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
import undetected_chromedriver as uc
import random import random
# import pandas as pd # import pandas as pd
from openpyxl import load_workbook, Workbook from openpyxl import load_workbook, Workbook
@ -41,8 +42,10 @@ import pytesseract
from PIL import Image from PIL import Image
# import uuid # import uuid
from threading import Thread, Event from threading import Thread, Event
from myChrome import MyChrome, MyUCChrome from myChrome import MyChrome
from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel if sys.platform != "darwin":
from myChrome import MyUCChrome
from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
desired_capabilities = DesiredCapabilities.CHROME desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none" desired_capabilities["pageLoadStrategy"] = "none"
@ -1326,6 +1329,8 @@ class BrowserThread(Thread):
if __name__ == '__main__': if __name__ == '__main__':
# from multiprocessing import freeze_support
# freeze_support() # 防止无限死循环多开
config = { config = {
"id": [0], "id": [0],
"saved_file_name": "", "saved_file_name": "",
@ -1358,6 +1363,9 @@ if __name__ == '__main__':
# option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome" # option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
# driver_path = os.getcwd()+ "/chromedriver_mac64" # driver_path = os.getcwd()+ "/chromedriver_mac64"
print(driver_path) print(driver_path)
if c.config_folder == "":
c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/")
# print("Config folder for MacOS:", c.config_folder)
elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径 elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径
print("Finding chromedriver in EasySpider", print("Finding chromedriver in EasySpider",
os.getcwd()+"/EasySpider") os.getcwd()+"/EasySpider")
@ -1367,16 +1375,19 @@ if __name__ == '__main__':
driver_path = os.path.join( driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe") os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
option.add_extension("EasySpider/resources/app/XPathHelper.crx") option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "win32" and platform.architecture()[0] == "64bit": elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
options.binary_location = os.path.join( options.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
driver_path = os.path.join( driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe") os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
option.add_extension("EasySpider/resources/app/XPathHelper.crx") option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "linux" and platform.architecture()[0] == "64bit": elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64" driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
option.add_extension("EasySpider/resources/app/XPathHelper.crx") option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
else: else:
print("Unsupported platform") print("Unsupported platform")
sys.exit() sys.exit()
@ -1419,6 +1430,7 @@ if __name__ == '__main__':
try: try:
with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f: with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f:
config = json.load(f) config = json.load(f)
print("Config file path: " + c.config_folder + c.config_file_name)
absolute_user_data_folder = config["absolute_user_data_folder"] absolute_user_data_folder = config["absolute_user_data_folder"]
print("\nAbsolute_user_data_folder:", print("\nAbsolute_user_data_folder:",
absolute_user_data_folder, "\n") absolute_user_data_folder, "\n")
@ -1428,6 +1440,9 @@ if __name__ == '__main__':
option.add_argument( option.add_argument(
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒 f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
option.add_argument("--profile-directory=Default") option.add_argument("--profile-directory=Default")
options.add_argument(
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
options.add_argument("--profile-directory=Default")
if c.headless: if c.headless:
print("Headless mode") print("Headless mode")
@ -1444,7 +1459,7 @@ if __name__ == '__main__':
threads = [] threads = []
for i in c.id: for i in c.id:
print(options) # print(options)
print("id: ", i) print("id: ", i)
if c.read_type == "remote": if c.read_type == "remote":
print("remote") print("remote")
@ -1492,10 +1507,15 @@ if __name__ == '__main__':
browser_t = MyChrome( browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path) options=options, chrome_options=option, executable_path=driver_path)
elif cloudflare == 1: elif cloudflare == 1:
if sys.platform != "darwin":
browser_t = MyUCChrome( browser_t = MyUCChrome(
options=options, chrome_options=option, executable_path=driver_path) options=options, chrome_options=option, driver_executable_path=driver_path)
print("Pass Cloudflare Mode") print("Pass Cloudflare Mode")
print("过Cloudflare验证模式") print("过Cloudflare验证模式")
else:
print("Not support Cloudflare Mode on MacOS")
print("MacOS不支持Cloudflare验证模式")
sys.exit()
event = Event() event = Event()
event.set() event.set()
thread = BrowserThread(browser_t, i, service, thread = BrowserThread(browser_t, i, service,
@ -1505,26 +1525,33 @@ if __name__ == '__main__':
thread.start() thread.start()
# Set the pause operation # Set the pause operation
# if sys.platform != "linux": # if sys.platform != "linux":
# time.sleep(3)
# print("\n\n----------------------------------")
# print("正在运行任务长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码如果想恢复任务的执行请再次长按p键。")
# print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
# print("----------------------------------\n\n")
# Thread(target=check_pause, args=("p", event)).start() # Thread(target=check_pause, args=("p", event)).start()
# else: # else:
time.sleep(3) time.sleep(3)
press_time = {"duration": 0, "is_pressed": False}
print("\n\n----------------------------------") print("\n\n----------------------------------")
print("正在运行任务按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码如果想恢复任务的执行请再次按p键。") print("正在运行任务,按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码如果想恢复任务的执行请再次按p键。")
print("Running task, press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please press 'p' again.") print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
print("----------------------------------\n\n") print("----------------------------------\n\n")
# 使用监听器监听键盘输入 # 使用监听器监听键盘输入
try: try:
from pynput.keyboard import Key, Listener with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
listener.join() listener.join()
except: except:
print("您的操作系统不支持暂停功能。") pass
print("Your operating system does not support the pause function.") # print("您的操作系统不支持暂停功能。")
# print("Your operating system does not support the pause function.")
# print("线程长度:", len(threads) )
for thread in threads: for thread in threads:
print()
thread.join() thread.join()
for thread in threads: for thread in threads:

View File

@ -12,7 +12,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
import undetected_chromedriver as uc
desired_capabilities = DesiredCapabilities.CHROME desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none" desired_capabilities["pageLoadStrategy"] = "none"
@ -90,8 +89,11 @@ class MyChrome(webdriver.Chrome):
else: else:
return super().find_elements(by=by, value=value) return super().find_elements(by=by, value=value)
import sys
if sys.platform != "darwin": # MacOS不支持Cloudflare
import undetected_chromedriver_ES as uc
class MyUCChrome(uc.Chrome): class MyUCChrome(uc.Chrome):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.iframe_env = False # 现在的环境是root还是iframe self.iframe_env = False # 现在的环境是root还是iframe

View File

@ -4,6 +4,7 @@ import csv
import datetime import datetime
import json import json
import os import os
import sys
import re import re
import time import time
import uuid import uuid
@ -24,26 +25,56 @@ def is_valid_url(url):
def lowercase_tags_in_xpath(xpath): def lowercase_tags_in_xpath(xpath):
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath) return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
def on_release_creator(event):
def on_release(key): def on_press_creator(press_time, event):
def on_press(key):
try: try:
if key.char == 'p': # 当按下esc键时退出监听 if key.char == 'p':
if press_time["is_pressed"] == False: # 没按下p键时记录按下p键的时间
press_time["duration"] = time.time()
press_time["is_pressed"] = True
else: # 按下p键时判断按下p键的时间是否超过2.5秒
duration = time.time() - press_time["duration"]
if duration > 2:
if event._flag == False: if event._flag == False:
print("任务执行中按p键暂停执行。") print("任务执行中,按p键暂停执行。")
print("Task is running, press 'p' to pause.") print("Task is running, long press 'p' to pause.")
# 设置Event的值为True使得线程b可以继续执行 # 设置Event的值为True使得线程b可以继续执行
event.set() event.set()
else: else:
# 设置Event的值为False使得线程b暂停执行 # 设置Event的值为False使得线程b暂停执行
print("任务已暂停,按p键继续执行...") print("任务已暂停,按p键继续执行...")
print("Task paused, press 'p' to continue...") print("Task paused, long press 'p' to continue...")
event.clear() event.clear()
press_time["duration"] = time.time()
press_time["is_pressed"] = False
# print("按下p键时间", press_time["duration"])
except:
pass
return on_press
def on_release_creator(event, press_time):
def on_release(key):
try:
# duration = time.time() - press_time["duration"]
# # print("松开p键时间", time.time(), "Duration: ", duration)
# if duration > 2.5 and key.char == 'p':
# if event._flag == False:
# print("任务执行中按p键暂停执行。")
# print("Task is running, press 'p' to pause.")
# # 设置Event的值为True使得线程b可以继续执行
# event.set()
# else:
# # 设置Event的值为False使得线程b暂停执行
# print("任务已暂停按p键继续执行...")
# print("Task paused, press 'p' to continue...")
# event.clear()
# press_time["duration"] = time.time()
press_time["is_pressed"] = False
except: except:
pass pass
return on_release return on_release
def on_press(key):
pass
# def check_pause(key, event): # def check_pause(key, event):
# while True: # while True:
@ -189,16 +220,22 @@ class myMySQL:
def __init__(self, config_file="mysql_config.json"): def __init__(self, config_file="mysql_config.json"):
# 读取配置文件 # 读取配置文件
try: try:
if sys.platform == "darwin":
if config_file.find("./") >= 0:
config_file = config_file.replace("./", "")
config_file = os.path.expanduser("~/Library/Application Support/EasySpider/" + config_file)
print("MySQL config file path: ", config_file)
with open(config_file, 'r') as f: with open(config_file, 'r') as f:
config = json.load(f) config = json.load(f)
host = config["host"] host = config["host"]
port = config["port"] port = config["port"]
user = config["user"] user = config["username"]
passwd = config["password"] passwd = config["password"]
db = config["database"] db = config["database"]
except: except Exception as e:
print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在。") print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在。")
print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.") print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.")
print(e)
try: try:
self.conn = pymysql.connect( self.conn = pymysql.connect(
host=host, port=port, user=user, passwd=passwd, db=db) host=host, port=port, user=user, passwd=passwd, db=db)

2
.temp_to_pub/compress.cmd Normal file → Executable file
View File

@ -1 +1 @@
python compress.py python3 compress.py

View File

@ -45,7 +45,10 @@ def compress_folder_to_7z_split(folder_path, output_file):
try: try:
subprocess.call(["7z", "a", "-v95m", output_file, folder_path]) subprocess.call(["7z", "a", "-v95m", output_file, folder_path])
except: except:
try:
subprocess.call(["7za", "a", "-v95m", output_file, folder_path]) subprocess.call(["7za", "a", "-v95m", output_file, folder_path])
except:
subprocess.call(["7zz", "a", "-v95m", output_file, folder_path])
easyspider_version = "0.3.5" easyspider_version = "0.3.5"
@ -104,5 +107,11 @@ if __name__ == "__main__":
subprocess.call(["tar", "-Jcvf", file_name, "./EasySpider_Linux_x64"]) subprocess.call(["tar", "-Jcvf", file_name, "./EasySpider_Linux_x64"])
print(f"Compress {file_name} successfully!") print(f"Compress {file_name} successfully!")
elif sys.platform == "darwin" and platform.architecture()[0] == "64bit": elif sys.platform == "darwin" and platform.architecture()[0] == "64bit":
pass file_name = f"EasySpider_{easyspider_version}_MacOS_all_arch.tar.gz"
if os.path.exists("./EasySpider_MacOS_all_arch/Data"):
shutil.rmtree("./EasySpider_MacOS_all_arch/Data")
os.mkdir("./EasySpider_MacOS_all_arch/Data")
subprocess.call(["tar", "-zcvf", file_name, "./EasySpider_MacOS_all_arch"])
subprocess.call(["7zz", "a", "-v95m", file_name.replace(".tar.gz", ".7z"), file_name, "请继续解压EasySpider_MacOS_all_arch.tar.gz使用.txt"])
print(f"Compress {file_name} successfully!")

View File

@ -0,0 +1 @@
请继续解压.tar.gz文件以使用易采集。

Binary file not shown.

Binary file not shown.

View File

@ -1 +1 @@
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"} {"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data1","debug":false,"mysql_config_path":"/Users/naibowang/Documents/EasySpider/ElectronJS/mysql_config.json","absolute_user_data_folder":"/Users/naibowang/Documents/EasySpider/ElectronJS/user_data1"}

View File

@ -324,7 +324,9 @@ async function beginInvoke(msg, ws) {
config.absolute_user_data_folder = user_data_folder_path; config.absolute_user_data_folder = user_data_folder_path;
fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config)); fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config));
} }
if(msg.message.mysql_config_path != "-1"){
config.mysql_config_path = msg.message.mysql_config_path; config.mysql_config_path = msg.message.mysql_config_path;
}
fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config)); fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config));
// child('Chrome/easyspider_executestage.exe', parameters, function(err,stdout, stderr) { // child('Chrome/easyspider_executestage.exe', parameters, function(err,stdout, stderr) {
// console.log(stdout); // console.log(stdout);

View File

@ -23,4 +23,4 @@ cp ../ExecuteStage/easyspider_executestage.py ../.temp_to_pub/EasySpider_MacOS_a
cp ../ExecuteStage/myChrome.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code cp ../ExecuteStage/myChrome.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
cp ../ExecuteStage/utils.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code cp ../ExecuteStage/utils.py ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
cp ../ExecuteStage/requirements.txt ../.temp_to_pub/EasySpider_MacOS_all_arch/Code cp ../ExecuteStage/requirements.txt ../.temp_to_pub/EasySpider_MacOS_all_arch/Code
cp -Rf ../undetected_chromedriver_ES ../.temp_to_pub/EasySpider_MacOS_all_arch/Code cp -Rf ../ExecuteStage/undetected_chromedriver_ES ../.temp_to_pub/EasySpider_MacOS_all_arch/Code

View File

@ -563,7 +563,7 @@
<label>Is it an extreme anti-scraping website like Cloudflare?</label> <label>Is it an extreme anti-scraping website like Cloudflare?</label>
<select id="cloudflare" name="cloudflare" class="form-control"> <select id="cloudflare" name="cloudflare" class="form-control">
<option value=0>No</option> <option value=0>No</option>
<option value=1>Yes</option> <option value=1>Yes (Not support on MacOS, unless compile by yourself)</option>
</select> </select>
<label>Browser Emulation Type:</label> <label>Browser Emulation Type:</label>
<select id="environment" name="environment" class="form-control"> <select id="environment" name="environment" class="form-control">

View File

@ -563,7 +563,7 @@
<label>是否为Cloudflare等极端反爬网站<a href="https://www.bilibili.com/video/BV1Ph4y1E7R9/" target="_blank">查看Cloudflare设计和执行教程</a></label> <label>是否为Cloudflare等极端反爬网站<a href="https://www.bilibili.com/video/BV1Ph4y1E7R9/" target="_blank">查看Cloudflare设计和执行教程</a></label>
<select id="cloudflare" name="cloudflare" class="form-control"> <select id="cloudflare" name="cloudflare" class="form-control">
<option value = 0></option> <option value = 0></option>
<option value = 1></option> <option value = 1>MacOS不支持直接运行但可以自行编译</option>
</select> </select>
<label>浏览器模拟类型:</label> <label>浏览器模拟类型:</label>
<select id="environment" name="environment" class="form-control"> <select id="environment" name="environment" class="form-control">

View File

@ -209,7 +209,7 @@
<input type="text" class="form-control" v-model="user_data_folder"></input> <input type="text" class="form-control" v-model="user_data_folder"></input>
</div> </div>
<div class="form-group" style="margin-top: 10px" v-if="task.outputFormat=='mysql'"> <div class="form-group" style="margin-top: 10px" v-if="task.outputFormat=='mysql'">
<label>{{"MySQL configuration file Path:~MySQL配置文件路径" | lang}}</label> <label>{{"MySQL configuration file Path, relative to this folder:~MySQL配置文件路径,路径相对此文件夹" | lang}} {{config_folder}}</label>
<input type="text" class="form-control" v-model="mysql_config_path"></input> <input type="text" class="form-control" v-model="mysql_config_path"></input>
</div> </div>
</form> </form>
@ -485,13 +485,23 @@
ws.onopen = function () { ws.onopen = function () {
// Web Socket 已连接上,使用 send() 方法发送数据 // Web Socket 已连接上,使用 send() 方法发送数据
console.log("Connected"); console.log("Connected");
message = { let message = {
type: 0, //消息类型0代表链接操作 type: 0, //消息类型0代表链接操作
message: { message: {
id: 1, //socket id id: 1, //socket id
} }
}; };
this.send(JSON.stringify(message)); this.send(JSON.stringify(message));
message = { //显示flowchart
type: 5, //消息类型,调用执行程序
message: {
"id": -1,
"user_data_folder": "",
"mysql_config_path": "-1",
"execute_type": 1,
}
};
this.send(JSON.stringify(message));
}; };
ws.onmessage = function(message){ ws.onmessage = function(message){
message = JSON.parse(message.data); message = JSON.parse(message.data);

File diff suppressed because one or more lines are too long

View File

@ -15,7 +15,7 @@ import time
import requests import requests
from urllib.parse import urljoin from urllib.parse import urljoin
from lxml import etree from lxml import etree
import undetected_chromedriver as uc # import undetected_chromedriver as uc
from pynput.keyboard import Key, Listener from pynput.keyboard import Key, Listener
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.keys import Keys
@ -42,7 +42,9 @@ import pytesseract
from PIL import Image from PIL import Image
# import uuid # import uuid
from threading import Thread, Event from threading import Thread, Event
from myChrome import MyChrome, MyUCChrome from myChrome import MyChrome
if sys.platform != "darwin":
from myChrome import MyUCChrome
from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
desired_capabilities = DesiredCapabilities.CHROME desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none" desired_capabilities["pageLoadStrategy"] = "none"
@ -1327,8 +1329,8 @@ class BrowserThread(Thread):
if __name__ == '__main__': if __name__ == '__main__':
from multiprocessing import freeze_support # from multiprocessing import freeze_support
freeze_support() # 防止无限死循环多开 # freeze_support() # 防止无限死循环多开
config = { config = {
"id": [0], "id": [0],
"saved_file_name": "", "saved_file_name": "",
@ -1361,6 +1363,9 @@ if __name__ == '__main__':
# option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome" # option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
# driver_path = os.getcwd()+ "/chromedriver_mac64" # driver_path = os.getcwd()+ "/chromedriver_mac64"
print(driver_path) print(driver_path)
if c.config_folder == "":
c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/")
# print("Config folder for MacOS:", c.config_folder)
elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径 elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径
print("Finding chromedriver in EasySpider", print("Finding chromedriver in EasySpider",
os.getcwd()+"/EasySpider") os.getcwd()+"/EasySpider")
@ -1425,6 +1430,7 @@ if __name__ == '__main__':
try: try:
with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f: with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f:
config = json.load(f) config = json.load(f)
print("Config file path: " + c.config_folder + c.config_file_name)
absolute_user_data_folder = config["absolute_user_data_folder"] absolute_user_data_folder = config["absolute_user_data_folder"]
print("\nAbsolute_user_data_folder:", print("\nAbsolute_user_data_folder:",
absolute_user_data_folder, "\n") absolute_user_data_folder, "\n")
@ -1501,13 +1507,15 @@ if __name__ == '__main__':
browser_t = MyChrome( browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path) options=options, chrome_options=option, executable_path=driver_path)
elif cloudflare == 1: elif cloudflare == 1:
if sys.platform == "linux": if sys.platform != "darwin":
import ssl
ssl._create_default_https_context = ssl._create_unverified_context # 忽略证书验证
browser_t = MyUCChrome( browser_t = MyUCChrome(
options=options, chrome_options=option, driver_executable_path=driver_path) options=options, chrome_options=option, driver_executable_path=driver_path)
print("Pass Cloudflare Mode") print("Pass Cloudflare Mode")
print("过Cloudflare验证模式") print("过Cloudflare验证模式")
else:
print("Not support Cloudflare Mode on MacOS")
print("MacOS不支持Cloudflare验证模式")
sys.exit()
event = Event() event = Event()
event.set() event.set()
thread = BrowserThread(browser_t, i, service, thread = BrowserThread(browser_t, i, service,

View File

@ -12,7 +12,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
import undetected_chromedriver_ES as uc
desired_capabilities = DesiredCapabilities.CHROME desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none" desired_capabilities["pageLoadStrategy"] = "none"
@ -90,8 +89,11 @@ class MyChrome(webdriver.Chrome):
else: else:
return super().find_elements(by=by, value=value) return super().find_elements(by=by, value=value)
import sys
if sys.platform != "darwin": # MacOS不支持Cloudflare
import undetected_chromedriver_ES as uc
class MyUCChrome(uc.Chrome): class MyUCChrome(uc.Chrome):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.iframe_env = False # 现在的环境是root还是iframe self.iframe_env = False # 现在的环境是root还是iframe

View File

@ -4,6 +4,7 @@ import csv
import datetime import datetime
import json import json
import os import os
import sys
import re import re
import time import time
import uuid import uuid
@ -219,6 +220,11 @@ class myMySQL:
def __init__(self, config_file="mysql_config.json"): def __init__(self, config_file="mysql_config.json"):
# 读取配置文件 # 读取配置文件
try: try:
if sys.platform == "darwin":
if config_file.find("./") >= 0:
config_file = config_file.replace("./", "")
config_file = os.path.expanduser("~/Library/Application Support/EasySpider/" + config_file)
print("MySQL config file path: ", config_file)
with open(config_file, 'r') as f: with open(config_file, 'r') as f:
config = json.load(f) config = json.load(f)
host = config["host"] host = config["host"]