Compatible for Windows

This commit is contained in:
naibo 2023-07-10 11:31:11 +08:00
parent 23dd4e4dd7
commit c31bc94dd0
6 changed files with 132 additions and 94 deletions

View File

@ -15,7 +15,7 @@ import time
import requests import requests
from urllib.parse import urljoin from urllib.parse import urljoin
from lxml import etree from lxml import etree
import undetected_chromedriver as uc # import undetected_chromedriver as uc
from pynput.keyboard import Key, Listener from pynput.keyboard import Key, Listener
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.keys import Keys
@ -42,8 +42,10 @@ import pytesseract
from PIL import Image from PIL import Image
# import uuid # import uuid
from threading import Thread, Event from threading import Thread, Event
from myChrome import MyChrome, MyUCChrome from myChrome import MyChrome
from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel if sys.platform != "darwin":
from myChrome import MyUCChrome
from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
desired_capabilities = DesiredCapabilities.CHROME desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none" desired_capabilities["pageLoadStrategy"] = "none"
@ -279,7 +281,10 @@ class BrowserThread(Thread):
except: except:
self.Log('Time out after set seconds when scrolling. ') self.Log('Time out after set seconds when scrolling. ')
self.recordLog('Time out after set seconds when scrolling') self.recordLog('Time out after set seconds when scrolling')
try:
self.browser.execute_script('window.stop()') self.browser.execute_script('window.stop()')
except:
pass
if scrollType != 0 and para["scrollCount"] > 0: # 控制屏幕向下滚动 if scrollType != 0 and para["scrollCount"] > 0: # 控制屏幕向下滚动
for i in range(para["scrollCount"]): for i in range(para["scrollCount"]):
self.Log("Wait for set second after screen scrolling") self.Log("Wait for set second after screen scrolling")
@ -677,7 +682,10 @@ class BrowserThread(Thread):
# 切换历史记录等待: # 切换历史记录等待:
self.Log("Change history back time or:", self.Log("Change history back time or:",
node["parameters"]["historyWait"]) node["parameters"]["historyWait"])
try:
self.browser.execute_script('window.stop()') self.browser.execute_script('window.stop()')
except:
pass
if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件 if int(node["parameters"]["breakMode"]) > 0: # 如果设置了退出循环的脚本条件
output = self.execute_code(int( output = self.execute_code(int(
node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"]) node["parameters"]["breakMode"]) - 1, node["parameters"]["breakCode"], node["parameters"]["breakCodeWaitTime"], iframe=node["parameters"]["iframe"])
@ -722,7 +730,10 @@ class BrowserThread(Thread):
# time.sleep(2) # time.sleep(2)
self.Log("Change history back time or:", self.Log("Change history back time or:",
node["parameters"]["historyWait"]) node["parameters"]["historyWait"])
try:
self.browser.execute_script('window.stop()') self.browser.execute_script('window.stop()')
except:
pass
except NoSuchElementException: except NoSuchElementException:
print("Loop element not found: ", path) print("Loop element not found: ", path)
print("找不到循环元素: ", path) print("找不到循环元素: ", path)
@ -995,7 +1006,10 @@ class BrowserThread(Thread):
self.history["index"] = self.browser.execute_script( self.history["index"] = self.browser.execute_script(
"return history.length") "return history.length")
except TimeoutException: except TimeoutException:
try:
self.browser.execute_script('window.stop()') self.browser.execute_script('window.stop()')
except:
pass
self.history["index"] = self.browser.execute_script( self.history["index"] = self.browser.execute_script(
"return history.length") "return history.length")
else: else:
@ -1003,7 +1017,10 @@ class BrowserThread(Thread):
self.history["index"] = self.browser.execute_script( self.history["index"] = self.browser.execute_script(
"return history.length") "return history.length")
except TimeoutException: except TimeoutException:
try:
self.browser.execute_script('window.stop()') self.browser.execute_script('window.stop()')
except:
pass
self.history["index"] = self.browser.execute_script( self.history["index"] = self.browser.execute_script(
"return history.length") "return history.length")
# 如果打开了新窗口,切换到新窗口 # 如果打开了新窗口,切换到新窗口
@ -1275,7 +1292,10 @@ class BrowserThread(Thread):
self.Log('Time out after set seconds when getting data') self.Log('Time out after set seconds when getting data')
self.recordLog( self.recordLog(
'Time out after set seconds when getting data') 'Time out after set seconds when getting data')
try:
self.browser.execute_script('window.stop()') self.browser.execute_script('window.stop()')
except:
pass
if p["relative"]: # 是否相对xpath if p["relative"]: # 是否相对xpath
if p["relativeXPath"] == "": # 相对xpath有时候就是元素本身不需要二次查找 if p["relativeXPath"] == "": # 相对xpath有时候就是元素本身不需要二次查找
element = loopElement element = loopElement
@ -1327,8 +1347,8 @@ class BrowserThread(Thread):
if __name__ == '__main__': if __name__ == '__main__':
from multiprocessing import freeze_support # from multiprocessing import freeze_support
freeze_support() # 防止无限死循环多开 # freeze_support() # 防止无限死循环多开
config = { config = {
"id": [0], "id": [0],
"saved_file_name": "", "saved_file_name": "",
@ -1361,6 +1381,9 @@ if __name__ == '__main__':
# option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome" # option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
# driver_path = os.getcwd()+ "/chromedriver_mac64" # driver_path = os.getcwd()+ "/chromedriver_mac64"
print(driver_path) print(driver_path)
if c.config_folder == "":
c.config_folder = os.path.expanduser("~/Library/Application Support/EasySpider/")
# print("Config folder for MacOS:", c.config_folder)
elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径 elif os.path.exists(os.getcwd()+"/EasySpider/resources"): # 打包后的路径
print("Finding chromedriver in EasySpider", print("Finding chromedriver in EasySpider",
os.getcwd()+"/EasySpider") os.getcwd()+"/EasySpider")
@ -1425,6 +1448,7 @@ if __name__ == '__main__':
try: try:
with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f: with open(c.config_folder + c.config_file_name, "r", encoding='utf-8') as f:
config = json.load(f) config = json.load(f)
print("Config file path: " + c.config_folder + c.config_file_name)
absolute_user_data_folder = config["absolute_user_data_folder"] absolute_user_data_folder = config["absolute_user_data_folder"]
print("\nAbsolute_user_data_folder:", print("\nAbsolute_user_data_folder:",
absolute_user_data_folder, "\n") absolute_user_data_folder, "\n")
@ -1501,10 +1525,15 @@ if __name__ == '__main__':
browser_t = MyChrome( browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path) options=options, chrome_options=option, executable_path=driver_path)
elif cloudflare == 1: elif cloudflare == 1:
if sys.platform != "darwin":
browser_t = MyUCChrome( browser_t = MyUCChrome(
options=options, chrome_options=option, driver_executable_path=driver_path) options=options, chrome_options=option, driver_executable_path=driver_path)
print("Pass Cloudflare Mode") print("Pass Cloudflare Mode")
print("过Cloudflare验证模式") print("过Cloudflare验证模式")
else:
print("Not support Cloudflare Mode on MacOS")
print("MacOS不支持Cloudflare验证模式")
sys.exit()
event = Event() event = Event()
event.set() event.set()
thread = BrowserThread(browser_t, i, service, thread = BrowserThread(browser_t, i, service,

View File

@ -12,7 +12,6 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
import undetected_chromedriver_ES as uc
desired_capabilities = DesiredCapabilities.CHROME desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none" desired_capabilities["pageLoadStrategy"] = "none"
@ -90,6 +89,9 @@ class MyChrome(webdriver.Chrome):
else: else:
return super().find_elements(by=by, value=value) return super().find_elements(by=by, value=value)
import sys
if sys.platform != "darwin": # MacOS不支持Cloudflare
import undetected_chromedriver_ES as uc
class MyUCChrome(uc.Chrome): class MyUCChrome(uc.Chrome):

View File

@ -4,10 +4,11 @@ import csv
import datetime import datetime
import json import json
import os import os
import sys
import re import re
import time import time
import uuid import uuid
import keyboard # import keyboard
from openpyxl import Workbook, load_workbook from openpyxl import Workbook, load_workbook
import requests import requests
from urllib.parse import urlparse from urllib.parse import urlparse
@ -75,20 +76,20 @@ def on_release_creator(event, press_time):
return on_release return on_release
def check_pause(key, event): # def check_pause(key, event):
while True: # while True:
if keyboard.is_pressed(key): # 按下p键暂停程序 # if keyboard.is_pressed(key): # 按下p键暂停程序
if event._flag == False: # if event._flag == False:
print("任务执行中长按p键暂停执行。") # print("任务执行中长按p键暂停执行。")
print("Task is running, long press 'p' to pause.") # print("Task is running, long press 'p' to pause.")
# 设置Event的值为True使得线程b可以继续执行 # # 设置Event的值为True使得线程b可以继续执行
event.set() # event.set()
else: # else:
# 设置Event的值为False使得线程b暂停执行 # # 设置Event的值为False使得线程b暂停执行
print("任务已暂停长按p键继续执行...") # print("任务已暂停长按p键继续执行...")
print("Task paused, press 'p' to continue...") # print("Task paused, press 'p' to continue...")
event.clear() # event.clear()
time.sleep(1) # 每秒检查一次 # time.sleep(1) # 每秒检查一次
def download_image(url, save_directory): def download_image(url, save_directory):
@ -219,16 +220,22 @@ class myMySQL:
def __init__(self, config_file="mysql_config.json"): def __init__(self, config_file="mysql_config.json"):
# 读取配置文件 # 读取配置文件
try: try:
if sys.platform == "darwin":
if config_file.find("./") >= 0:
config_file = config_file.replace("./", "")
config_file = os.path.expanduser("~/Library/Application Support/EasySpider/" + config_file)
print("MySQL config file path: ", config_file)
with open(config_file, 'r') as f: with open(config_file, 'r') as f:
config = json.load(f) config = json.load(f)
host = config["host"] host = config["host"]
port = config["port"] port = config["port"]
user = config["user"] user = config["username"]
passwd = config["password"] passwd = config["password"]
db = config["database"] db = config["database"]
except: except Exception as e:
print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在。") print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在。")
print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.") print("Failed to read configuration file, please check if the configuration file: "+config_file+" exists.")
print(e)
try: try:
self.conn = pymysql.connect( self.conn = pymysql.connect(
host=host, port=port, user=user, passwd=passwd, db=db) host=host, port=port, user=user, passwd=passwd, db=db)

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.