mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-22 08:27:27 +08:00
Change keyboard module
This commit is contained in:
parent
bb6e2b2881
commit
ae07c91cf1
@ -12,8 +12,8 @@ import sys
|
|||||||
# import base64
|
# import base64
|
||||||
# import hashlib
|
# import hashlib
|
||||||
import time
|
import time
|
||||||
# import keyboard
|
|
||||||
import requests
|
import requests
|
||||||
|
from urllib.parse import urljoin
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
from selenium.webdriver.common.keys import Keys
|
from selenium.webdriver.common.keys import Keys
|
||||||
@ -39,14 +39,14 @@ import os
|
|||||||
from commandline_config import Config
|
from commandline_config import Config
|
||||||
import pytesseract
|
import pytesseract
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
from pynput.keyboard import Key, Listener
|
||||||
# import uuid
|
# import uuid
|
||||||
from threading import Thread, Event
|
from threading import Thread, Event
|
||||||
from myChrome import MyChrome, MyUCChrome
|
from myChrome import MyChrome, MyUCChrome
|
||||||
from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, write_to_csv, write_to_excel
|
from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel
|
||||||
desired_capabilities = DesiredCapabilities.CHROME
|
desired_capabilities = DesiredCapabilities.CHROME
|
||||||
desired_capabilities["pageLoadStrategy"] = "none"
|
desired_capabilities["pageLoadStrategy"] = "none"
|
||||||
|
|
||||||
|
|
||||||
class BrowserThread(Thread):
|
class BrowserThread(Thread):
|
||||||
def __init__(self, browser_t, id, service, version, event, saveName, config):
|
def __init__(self, browser_t, id, service, version, event, saveName, config):
|
||||||
Thread.__init__(self)
|
Thread.__init__(self)
|
||||||
@ -1172,22 +1172,15 @@ class BrowserThread(Thread):
|
|||||||
continue
|
continue
|
||||||
# p["relativeXPath"] = p["relativeXPath"].lower()
|
# p["relativeXPath"] = p["relativeXPath"].lower()
|
||||||
# p["relativeXPath"] = lowercase_tags_in_xpath(p["relativeXPath"])
|
# p["relativeXPath"] = lowercase_tags_in_xpath(p["relativeXPath"])
|
||||||
if p["nodeType"] == 2:
|
# 已经有text()或@href了,不需要再加
|
||||||
if p["relativeXPath"].find("/@href") >= 0:
|
if p["relativeXPath"].find("/@href") >= 0 or p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
|
||||||
xpath = p["relativeXPath"]
|
xpath = p["relativeXPath"]
|
||||||
else:
|
elif p["nodeType"] == 2:
|
||||||
xpath = p["relativeXPath"] + "/@href"
|
xpath = p["relativeXPath"] + "/@href"
|
||||||
elif p["contentType"] == 1:
|
elif p["contentType"] == 1:
|
||||||
# 已经有text()了,不需要再加
|
xpath = p["relativeXPath"] + "/text()"
|
||||||
if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
|
|
||||||
xpath = p["relativeXPath"]
|
|
||||||
else:
|
|
||||||
xpath = p["relativeXPath"] + "/text()"
|
|
||||||
elif p["contentType"] == 0:
|
elif p["contentType"] == 0:
|
||||||
if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
|
xpath = p["relativeXPath"] + "//text()"
|
||||||
xpath = p["relativeXPath"]
|
|
||||||
else:
|
|
||||||
xpath = p["relativeXPath"] + "//text()"
|
|
||||||
if p["relative"]:
|
if p["relative"]:
|
||||||
# if p["relativeXPath"] == "":
|
# if p["relativeXPath"] == "":
|
||||||
# content = [loopElementHTML]
|
# content = [loopElementHTML]
|
||||||
@ -1210,6 +1203,9 @@ class BrowserThread(Thread):
|
|||||||
# 拼接所有文本内容并去掉两边的空白
|
# 拼接所有文本内容并去掉两边的空白
|
||||||
content = ' '.join(result.strip()
|
content = ' '.join(result.strip()
|
||||||
for result in content if result.strip())
|
for result in content if result.strip())
|
||||||
|
if p["nodeType"] == 2:
|
||||||
|
base_url = self.browser.current_url
|
||||||
|
content = urljoin(base_url, content) # 合并链接相对路径为绝对路径
|
||||||
else:
|
else:
|
||||||
content = p["default"]
|
content = p["default"]
|
||||||
if not self.dataNotFoundKeys[p["name"]]:
|
if not self.dataNotFoundKeys[p["name"]]:
|
||||||
@ -1508,12 +1504,21 @@ if __name__ == '__main__':
|
|||||||
print("Thread with task id: ", i, " is created")
|
print("Thread with task id: ", i, " is created")
|
||||||
threads.append(thread)
|
threads.append(thread)
|
||||||
thread.start()
|
thread.start()
|
||||||
Thread(target=check_pause, args=("p", event)).start()
|
# Set the pause operation
|
||||||
time.sleep(5)
|
# if sys.platform != "linux":
|
||||||
|
# Thread(target=check_pause, args=("p", event)).start()
|
||||||
|
# else:
|
||||||
|
time.sleep(3)
|
||||||
print("\n\n----------------------------------")
|
print("\n\n----------------------------------")
|
||||||
print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
|
print("正在运行任务,按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次按p键。")
|
||||||
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
|
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
|
||||||
print("----------------------------------\n\n")
|
print("----------------------------------\n\n")
|
||||||
|
# 使用监听器监听键盘输入
|
||||||
|
with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
|
||||||
|
listener.join()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
for thread in threads:
|
for thread in threads:
|
||||||
thread.join()
|
thread.join()
|
||||||
|
@ -7,7 +7,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
import keyboard
|
# import keyboard
|
||||||
from openpyxl import Workbook, load_workbook
|
from openpyxl import Workbook, load_workbook
|
||||||
import requests
|
import requests
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
@ -24,20 +24,41 @@ def is_valid_url(url):
|
|||||||
def lowercase_tags_in_xpath(xpath):
|
def lowercase_tags_in_xpath(xpath):
|
||||||
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
|
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
|
||||||
|
|
||||||
def check_pause(key, event):
|
def on_release_creator(event):
|
||||||
while True:
|
def on_release(key):
|
||||||
if keyboard.is_pressed(key): # 按下p键,暂停程序
|
try:
|
||||||
if event._flag == False:
|
if key.char == 'p': # 当按下esc键时,退出监听
|
||||||
print("任务执行中,长按p键暂停执行。")
|
if event._flag == False:
|
||||||
print("Task is running, long press 'p' to pause.")
|
print("任务执行中,按p键暂停执行。")
|
||||||
# 设置Event的值为True,使得线程b可以继续执行
|
print("Task is running, long press 'p' to pause.")
|
||||||
event.set()
|
# 设置Event的值为True,使得线程b可以继续执行
|
||||||
else:
|
event.set()
|
||||||
# 设置Event的值为False,使得线程b暂停执行
|
else:
|
||||||
print("任务已暂停,长按p键继续执行...")
|
# 设置Event的值为False,使得线程b暂停执行
|
||||||
print("Task paused, press 'p' to continue...")
|
print("任务已暂停,按p键继续执行...")
|
||||||
event.clear()
|
print("Task paused, press 'p' to continue...")
|
||||||
time.sleep(1) # 每秒检查一次
|
event.clear()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return on_release
|
||||||
|
|
||||||
|
def on_press(key):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# def check_pause(key, event):
|
||||||
|
# while True:
|
||||||
|
# if keyboard.is_pressed(key): # 按下p键,暂停程序
|
||||||
|
# if event._flag == False:
|
||||||
|
# print("任务执行中,长按p键暂停执行。")
|
||||||
|
# print("Task is running, long press 'p' to pause.")
|
||||||
|
# # 设置Event的值为True,使得线程b可以继续执行
|
||||||
|
# event.set()
|
||||||
|
# else:
|
||||||
|
# # 设置Event的值为False,使得线程b暂停执行
|
||||||
|
# print("任务已暂停,长按p键继续执行...")
|
||||||
|
# print("Task paused, press 'p' to continue...")
|
||||||
|
# event.clear()
|
||||||
|
# time.sleep(1) # 每秒检查一次
|
||||||
|
|
||||||
|
|
||||||
def download_image(url, save_directory):
|
def download_image(url, save_directory):
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
BIN
ElectronJS/EasySpider_en.crx
Normal file
BIN
ElectronJS/EasySpider_en.crx
Normal file
Binary file not shown.
BIN
ElectronJS/EasySpider_zh.crx
Normal file
BIN
ElectronJS/EasySpider_zh.crx
Normal file
Binary file not shown.
@ -1511,7 +1511,7 @@ if __name__ == '__main__':
|
|||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
print("\n\n----------------------------------")
|
print("\n\n----------------------------------")
|
||||||
print("正在运行任务,按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次按p键。")
|
print("正在运行任务,按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次按p键。")
|
||||||
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
|
print("Running task, press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please press 'p' again.")
|
||||||
print("----------------------------------\n\n")
|
print("----------------------------------\n\n")
|
||||||
# 使用监听器监听键盘输入
|
# 使用监听器监听键盘输入
|
||||||
with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
|
with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user