mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-20 04:39:57 +08:00
Long Press p
This commit is contained in:
parent
79dbf60ef9
commit
812faf52cc
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -431,7 +431,7 @@
|
||||
</select>
|
||||
<div v-if='parseInt(loopType) < 2'>
|
||||
<label>XPath: <span style="font-size: 30px!important;" title="Relative XPATH writing: start with /, e.g. the loop item XPATH is /html/body/div[1], your input is /*[@id='tab-customer'], then the final addressed xpath is: /html/body/div[1]/*[@id='tab-customer']">☺</span></label>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]'></textarea>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]' placeholder="You cannot use expressions like @href or text() within loops in XPath. You can only locate elements, but cannot fetch attribute values. Declaratives like @href and text() are only supported in data extraction operations. Moreover, it is not recommended to use them. Instead, it's suggested choosing the node type and type of the content to collect directly within data extraction operations."></textarea>
|
||||
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(nowNode.parameters['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">(Testing feature) Click here to view other possible XPath expressions</button></p>
|
||||
</div>
|
||||
<div v-if='parseInt(loopType) == 2'>
|
||||
@ -440,7 +440,7 @@
|
||||
</div>
|
||||
<div v-else-if='parseInt(loopType) < 5'>
|
||||
<label>Content List (Use Field["FieldName"] to input the last extracted value of a field):</label>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="One text/URL per line" v-model='nowNode["parameters"]["textList"]'></textarea>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="One text/URL per line. You cannot use expressions like @href or text() within loops in XPath. You can only locate elements, but cannot fetch attribute values. Declaratives like @href and text() are only supported in data extraction operations. Moreover, it is not recommended to use them. Instead, it's suggested choosing the node type and type of the content to collect directly within data extraction operations." v-model='nowNode["parameters"]["textList"]'></textarea>
|
||||
</div>
|
||||
<div v-else-if='parseInt(loopType) < 7'>
|
||||
<label>Code (Use Field["FieldName"] to input the last extracted value of a field):</label>
|
||||
|
@ -431,12 +431,12 @@
|
||||
</select>
|
||||
<div v-if='parseInt(loopType) < 2'>
|
||||
<label>XPath: <span style="font-size: 30px!important;" title="相对XPATH写法:以/开头,如循环项XPATH为/html/body/div[1],您的输入为/*[@id='tab-customer'],则最终寻址的xpath为:/html/body/div[1]/*[@id='tab-customer']">☺</span></label>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]'></textarea>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" placeholder="循环里的XPath不能用@href或者text()这种写法,只能定位元素不能取属性值,即@href和text()这种写法只在提取数据操作中支持,并且不推荐,建议直接在提取数据操作中选择节点类型和采集内容类型。" v-model='nowNode["parameters"]["xpath"]'></textarea>
|
||||
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(nowNode.parameters['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">(测试功能)点此查看其他可能的XPath写法</button></p>
|
||||
</div>
|
||||
<div v-else-if='parseInt(loopType) == 2'>
|
||||
<label>XPath列表:</label>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="每行一个XPath" v-model='nowNode["parameters"]["pathList"]'></textarea>
|
||||
<textarea onkeydown="inputDelete(event)" class="form-control" rows="3" placeholder="每行一个XPath,循环里的XPath不能用@href或者text()这种写法,只能定位元素不能取属性值,即@href和text()这种写法只在提取数据操作中支持,并且不推荐,建议直接在提取数据操作中选择节点类型和采集内容类型。" v-model='nowNode["parameters"]["pathList"]'></textarea>
|
||||
</div>
|
||||
<div v-else-if='parseInt(loopType) < 5'>
|
||||
<label>内容列表(用Field["字段名"]来输入某字段提取到的最新值):</label>
|
||||
|
@ -213,7 +213,7 @@
|
||||
<input type="text" class="form-control" v-model="mysql_config_path"></input>
|
||||
</div>
|
||||
</form>
|
||||
<label style="display: block">{{"Click the button below to execute the task. Click p on the keyboard to pause the task. Manual intervention is possible during the task execution process, ~点击以下按钮执行任务,任务执行过程中可以按p键暂停任务的执行以便" | lang }}<b>{{"~人工干预," | lang}}</b>{{"such as manually input a password or captcha: ~如手动输入密码,验证码等。" | lang}}</label>
|
||||
<label style="display: block">{{"Click the button below to execute the task. Long press p on the keyboard to pause the task. Manual intervention is possible during the task execution process, ~点击以下按钮执行任务,任务执行过程中可以长按p键暂停任务的执行以便" | lang }}<b>{{"~人工干预," | lang}}</b>{{"such as manually input a password or captcha: ~如手动输入密码,验证码等。" | lang}}</label>
|
||||
<button class="btn btn-primary" v-on:click="localExecuteInstant(false)">{{"Directly Run Locally (Clean Mode)~本地直接执行(纯净模式)" |
|
||||
lang}}
|
||||
</button>
|
||||
|
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
||||
"justMyCode": true,
|
||||
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
"args": ["--id", "[35]", "--headless", "0", "--user_data", "1"]
|
||||
"args": ["--id", "[4]", "--headless", "0", "--user_data", "1"]
|
||||
}
|
||||
]
|
||||
}
|
@ -15,6 +15,8 @@ import time
|
||||
import requests
|
||||
from urllib.parse import urljoin
|
||||
from lxml import etree
|
||||
import undetected_chromedriver as uc
|
||||
from pynput.keyboard import Key, Listener
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.common.action_chains import ActionChains
|
||||
@ -41,7 +43,7 @@ from PIL import Image
|
||||
# import uuid
|
||||
from threading import Thread, Event
|
||||
from myChrome import MyChrome, MyUCChrome
|
||||
from utils import download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press, on_release_creator, write_to_csv, write_to_excel
|
||||
from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, on_press_creator, on_release_creator, write_to_csv, write_to_excel
|
||||
desired_capabilities = DesiredCapabilities.CHROME
|
||||
desired_capabilities["pageLoadStrategy"] = "none"
|
||||
|
||||
@ -1429,6 +1431,9 @@ if __name__ == '__main__':
|
||||
option.add_argument(
|
||||
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
|
||||
option.add_argument("--profile-directory=Default")
|
||||
options.add_argument(
|
||||
f'--user-data-dir={absolute_user_data_folder}') # TMALL 反扒
|
||||
options.add_argument("--profile-directory=Default")
|
||||
|
||||
if c.headless:
|
||||
print("Headless mode")
|
||||
@ -1445,7 +1450,7 @@ if __name__ == '__main__':
|
||||
|
||||
threads = []
|
||||
for i in c.id:
|
||||
print(options)
|
||||
# print(options)
|
||||
print("id: ", i)
|
||||
if c.read_type == "remote":
|
||||
print("remote")
|
||||
@ -1493,7 +1498,6 @@ if __name__ == '__main__':
|
||||
browser_t = MyChrome(
|
||||
options=options, chrome_options=option, executable_path=driver_path)
|
||||
elif cloudflare == 1:
|
||||
import undetected_chromedriver as uc
|
||||
browser_t = MyUCChrome(
|
||||
options=options, chrome_options=option, executable_path=driver_path)
|
||||
print("Pass Cloudflare Mode")
|
||||
@ -1507,24 +1511,29 @@ if __name__ == '__main__':
|
||||
thread.start()
|
||||
# Set the pause operation
|
||||
# if sys.platform != "linux":
|
||||
# time.sleep(3)
|
||||
# print("\n\n----------------------------------")
|
||||
# print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
|
||||
# print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
|
||||
# print("----------------------------------\n\n")
|
||||
# Thread(target=check_pause, args=("p", event)).start()
|
||||
# else:
|
||||
time.sleep(3)
|
||||
press_time = {"duration": 0, "is_pressed": False}
|
||||
print("\n\n----------------------------------")
|
||||
print("正在运行任务,按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次按p键。")
|
||||
print("Running task, press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please press 'p' again.")
|
||||
print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
|
||||
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
|
||||
print("----------------------------------\n\n")
|
||||
# 使用监听器监听键盘输入
|
||||
try:
|
||||
from pynput.keyboard import Key, Listener
|
||||
with Listener(on_press=on_press, on_release=on_release_creator(event)) as listener:
|
||||
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
|
||||
listener.join()
|
||||
except:
|
||||
print("您的操作系统不支持暂停功能。")
|
||||
print("Your operating system does not support the pause function.")
|
||||
|
||||
|
||||
print("线程长度:", len(threads) )
|
||||
# print("线程长度:", len(threads) )
|
||||
|
||||
for thread in threads:
|
||||
print()
|
||||
|
@ -7,7 +7,7 @@ import os
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
# import keyboard
|
||||
import keyboard
|
||||
from openpyxl import Workbook, load_workbook
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
@ -23,42 +23,72 @@ def is_valid_url(url):
|
||||
|
||||
def lowercase_tags_in_xpath(xpath):
|
||||
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
|
||||
|
||||
def on_release_creator(event):
|
||||
|
||||
|
||||
def on_press_creator(press_time, event):
|
||||
def on_press(key):
|
||||
try:
|
||||
if key.char == 'p':
|
||||
if press_time["is_pressed"] == False: # 没按下p键时,记录按下p键的时间
|
||||
press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = True
|
||||
else: # 按下p键时,判断按下p键的时间是否超过2.5秒
|
||||
duration = time.time() - press_time["duration"]
|
||||
if duration > 2.5:
|
||||
if event._flag == False:
|
||||
print("任务执行中,按p键暂停执行。")
|
||||
print("Task is running, press 'p' to pause.")
|
||||
# 设置Event的值为True,使得线程b可以继续执行
|
||||
event.set()
|
||||
else:
|
||||
# 设置Event的值为False,使得线程b暂停执行
|
||||
print("任务已暂停,按p键继续执行...")
|
||||
print("Task paused, press 'p' to continue...")
|
||||
event.clear()
|
||||
press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = False
|
||||
# print("按下p键时间:", press_time["duration"])
|
||||
except:
|
||||
pass
|
||||
return on_press
|
||||
|
||||
def on_release_creator(event, press_time):
|
||||
def on_release(key):
|
||||
try:
|
||||
if key.char == 'p': # 当按下esc键时,退出监听
|
||||
if event._flag == False:
|
||||
print("任务执行中,按p键暂停执行。")
|
||||
print("Task is running, press 'p' to pause.")
|
||||
# 设置Event的值为True,使得线程b可以继续执行
|
||||
event.set()
|
||||
else:
|
||||
# 设置Event的值为False,使得线程b暂停执行
|
||||
print("任务已暂停,按p键继续执行...")
|
||||
print("Task paused, press 'p' to continue...")
|
||||
event.clear()
|
||||
# duration = time.time() - press_time["duration"]
|
||||
# # print("松开p键时间:", time.time(), "Duration: ", duration)
|
||||
# if duration > 2.5 and key.char == 'p':
|
||||
# if event._flag == False:
|
||||
# print("任务执行中,按p键暂停执行。")
|
||||
# print("Task is running, press 'p' to pause.")
|
||||
# # 设置Event的值为True,使得线程b可以继续执行
|
||||
# event.set()
|
||||
# else:
|
||||
# # 设置Event的值为False,使得线程b暂停执行
|
||||
# print("任务已暂停,按p键继续执行...")
|
||||
# print("Task paused, press 'p' to continue...")
|
||||
# event.clear()
|
||||
# press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = False
|
||||
except:
|
||||
pass
|
||||
return on_release
|
||||
|
||||
def on_press(key):
|
||||
pass
|
||||
|
||||
# def check_pause(key, event):
|
||||
# while True:
|
||||
# if keyboard.is_pressed(key): # 按下p键,暂停程序
|
||||
# if event._flag == False:
|
||||
# print("任务执行中,长按p键暂停执行。")
|
||||
# print("Task is running, long press 'p' to pause.")
|
||||
# # 设置Event的值为True,使得线程b可以继续执行
|
||||
# event.set()
|
||||
# else:
|
||||
# # 设置Event的值为False,使得线程b暂停执行
|
||||
# print("任务已暂停,长按p键继续执行...")
|
||||
# print("Task paused, press 'p' to continue...")
|
||||
# event.clear()
|
||||
# time.sleep(1) # 每秒检查一次
|
||||
def check_pause(key, event):
|
||||
while True:
|
||||
if keyboard.is_pressed(key): # 按下p键,暂停程序
|
||||
if event._flag == False:
|
||||
print("任务执行中,长按p键暂停执行。")
|
||||
print("Task is running, long press 'p' to pause.")
|
||||
# 设置Event的值为True,使得线程b可以继续执行
|
||||
event.set()
|
||||
else:
|
||||
# 设置Event的值为False,使得线程b暂停执行
|
||||
print("任务已暂停,长按p键继续执行...")
|
||||
print("Task paused, press 'p' to continue...")
|
||||
event.clear()
|
||||
time.sleep(1) # 每秒检查一次
|
||||
|
||||
|
||||
def download_image(url, save_directory):
|
||||
|
Loading…
x
Reference in New Issue
Block a user