Cloudflare!!!

This commit is contained in:
naibo 2023-07-13 05:08:49 +08:00
parent 1e2ca08077
commit e50cd7a62a
26 changed files with 443 additions and 48 deletions

View File

@ -1577,13 +1577,14 @@ if __name__ == '__main__':
browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path)
elif cloudflare == 1:
if sys.platform != "darwin":
options.binary_location = "" # 需要用自己的浏览器
if sys.platform == "win32":
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
browser_t = MyUCChrome(
options=options, driver_executable_path=driver_path)
else:
print("Not support Cloudflare Mode on MacOS")
print("MacOS不支持Cloudflare验证模式")
print("Cloudflare模式只支持Windows x64平台。")
print("Cloudflare Mode only support on Windows x64 platform.")
sys.exit()
event = Event()
event.set()
@ -1607,9 +1608,9 @@ if __name__ == '__main__':
print("正在运行任务长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码如果想恢复任务的执行请再次长按p键。")
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
print("----------------------------------\n\n")
if cloudflare:
print("过Cloudflare验证模式有时候会不稳定请注意观察上方提示的浏览器版本信息是否正确,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。")
print("Passing the Cloudflare verification mode is sometimes unstable. Please pay attention to whether the browser version information prompted above is correct. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# if cloudflare:
# print("过Cloudflare验证模式有时候会不稳定如果无法通过验证则需要隔几分钟重试一次或者可以更换新的用户信息文件夹再执行任务。")
# print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# 使用监听器监听键盘输入
try:
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:

View File

@ -12,6 +12,8 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import sys
desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"
@ -89,9 +91,13 @@ class MyChrome(webdriver.Chrome):
else:
return super().find_elements(by=by, value=value)
import sys
if sys.platform != "darwin": # MacOS不支持Cloudflare
import undetected_chromedriver_ES as uc
# MacOS不支持直接打包带Cloudflare的功能如果要自己编译运行可以把这个if去掉然后配置好浏览器和driver路径
if sys.platform != "darwin":
ES = True
if ES: # 用自己写的ES版本
import undetected_chromedriver_ES as uc
else:
import undetected_chromedriver as uc
class MyUCChrome(uc.Chrome):

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -9,7 +9,7 @@ import platform
import shutil
import zipfile
import urllib.request
import py7zr
# import py7zr
def compress_folder_to_7z(folder_path, output_file):
if os.path.exists(output_file):

Binary file not shown.

Binary file not shown.

View File

@ -30,7 +30,7 @@ def update_file_version(file_path, new_version, key="当前版本/Current Versio
file.write(line)
version = "0.3.5"
version = "0.3.6"
# py html js
@ -39,7 +39,7 @@ if __name__ == "__main__":
file_path = "../.temp_to_pub/compress.py"
update_file_version(file_path, version, key='easyspider_version = "')
file_path = "./src/taskGrid/logic_deprecated.js"
file_path = "./src/taskGrid/logic.js"
update_file_version(file_path, version, key='"version": "')
# file_path = "./src/taskGrid/logic.js"

View File

@ -1 +1 @@
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":0,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Document\\Projects\\EasySpider\\ElectronJS\\user_data"}
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}

View File

@ -105,14 +105,14 @@ let handle_pairs = {};
function createWindow() {
// Create the browser window.
mainWindow = new BrowserWindow({
width: 520,
width: 550,
height: 750,
webPreferences: {
preload: path.join(__dirname, 'src/js/preload.js')
},
icon: iconPath,
// frame: false, //取消window自带的关闭最小化等
// resizable: false //禁止改变主窗口尺寸
resizable: false //禁止改变主窗口尺寸
})
// and load the index.html of the app.
@ -126,7 +126,7 @@ function createWindow() {
app.quit();
}
});
mainWindow.webContents.openDevTools();
// mainWindow.webContents.openDevTools();
// Open the DevTools.
// mainWindow.webContents.openDevTools()
}
@ -549,6 +549,10 @@ app.whenReady().then(() => {
})
ipcMain.on('start-design', handleOpenBrowser);
ipcMain.on('start-invoke', handleOpenInvoke);
ipcMain.on('accept-agreement', function (event, arg) {
config.copyright = 1;
fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config));
});
createWindow();
app.on('activate', function () {

View File

@ -56,7 +56,19 @@
<div style="padding: 10px; text-align: center;vertical-align: middle;" v-else>
<div v-if="lang=='en'">
<div v-if="step == -1">
TEST
<h4 style="margin-top: 20px">Copyright and Disclaimer</h4>
<p>Please carefully read the following instructions regarding the use of the software and commercial payments. If you agree, please accept the agreement.</p>
<textarea class="form-control" style="margin:0 auto;width:90%; color:black; height: 450px; min-height: 200px; background: white" readonly>
This software is intended for educational and communication purposes only. It is strictly prohibited to use the software for any illegal activities or operations, such as crawling government/military websites that are not allowed to be crawled. The user bears all consequences resulting from the use of this software and the author shall not be held responsible or liable in any way. Furthermore, the software is protected by patent rights. If you intend to use it for commercial purposes or profit-making activities, such as using the software for client orders, please contact Hangzhou Tianqin Intellectual Property Agency Co., Ltd. (http://www.tqip.com/) for patent authorization and payment operations: https://www.patentguru.com/cn/search?q=一种自定义提取流程的服务封装系统
For individual users, EasySpider is a completely free and ad-free open-source software. The development and maintenance of the software rely solely on the author's voluntary efforts. Therefore, you can choose to support the author, allowing them to have more enthusiasm and energy to maintain this software. Alternatively, if you have profited from using this software, you are welcome to support the author through the following methods:
1. PayPal account: naibowang, or scan the QR code provided in the software package.
2. Alipay account: naibowang@foxmail.com, or scan the QR code provided in the software package.
3. WeChat payment: scan the QR code provided in the software package.
</textarea>
<p><a @click="acceptAgreement" class="btn btn-primary btn-lg"
style="margin-top: 30px; width: 300px;height:60px;padding-top:12px;color:white">Agree and Start</a></p>
</div>
<div v-if="step == 0">
<p style="margin-top: 20px">Hint: Click Button below to start.</p>
@ -130,8 +142,18 @@
<div v-else-if="lang=='zh'">
<div v-if="step == -1">
<h4 style="margin-top: 20px">版权和注意事项声明</h4>
<p>请仔细阅读下方有关软件使用和商用付费的说明,并接受使用协议以使用本软件。</p>
<textarea class="form-control" style="min-height: 200px;" readonly>
<p>请仔细阅读下方有关软件使用和商用付费的说明,同意请接受协议。</p>
<textarea class="form-control" style="margin:0 auto;width:90%; color:black; height: 480px; min-height: 200px; background: white" readonly>
本软件仅供学习交流使用,严禁使用软件进行任何违法违规的操作,如爬取不允许爬取的政府/军事机关网站等。使用本软件所造成的一切后果由使用者自负与作者本人无关作者不会承担任何责任。同时软件受到专利权保护如要用于商业用途如使用软件进行盈利接单等请联系杭州天勤知识产权代理有限公司http://www.tqip.com/进行专利授权等付费操作https://www.patentguru.com/cn/search?q=一种自定义提取流程的服务封装系统
对于个人使用者来说易采集EasySpider是一款完全免费无广告的开源软件软件开发和维护全靠作者用爱发电因此您可以选择支持作者让作者有更多的热情和精力维护此软件或者您使用了此软件进行了盈利欢迎您通过下面的方式支持作者
1、支付宝账号naibowang@foxmail.com也可以扫描软件包中带的二维码。
2、微信收款扫描软件包中带的二维码。
3、PayPal账号naibowang或扫描软件包中带的二维码。
</textarea>
<p><a @click="acceptAgreement" class="btn btn-primary btn-lg"
style="margin-top: 30px; width: 300px;height:60px;padding-top:12px;color:white">同意并开始使用</a></p>
</div>
<div v-if="step == 0">
<p style="margin-top: 20px">提示:点击下方按钮开始使用。</p>

View File

@ -55,6 +55,10 @@ var app = Vue.createApp({
this.init = false;
this.lang = lang;
},
acceptAgreement() {
this.step = 0;
window.electronAPI.acceptAgreement();
},
startDesign(lang, with_data = false, mobile=false) {
if (with_data) {
console.log(this.user_data_folder)

View File

@ -10,4 +10,5 @@ const { contextBridge, ipcRenderer } = require('electron');
contextBridge.exposeInMainWorld('electronAPI', {
startDesign: (lang="en", user_data_folder = '', mobile=false) => ipcRenderer.send('start-design', lang, user_data_folder, mobile),
startInvoke: (lang="en") => ipcRenderer.send('start-invoke', lang),
acceptAgreement: () => ipcRenderer.send('accept-agreement'),
})

View File

@ -566,7 +566,7 @@
<label>Is it an extreme anti-scraping website like Cloudflare (<a href="https://www.bilibili.com/video/BV1Ph4y1E7R9/" target="_blank">Watch Tutorial</a>)?</label>
<select id="cloudflare" name="cloudflare" class="form-control">
<option value=0>No</option>
<option value=1>Yes (Not support on MacOS, unless compile by yourself)</option>
<option value=1>Yes (Only support on Windows x64 platform)</option>
</select>
<label>Browser Emulation Type:</label>
<select id="environment" name="environment" class="form-control">

View File

@ -566,7 +566,7 @@
<label>是否为Cloudflare等极端反爬网站<a href="https://www.bilibili.com/video/BV1Ph4y1E7R9/" target="_blank">查看Cloudflare设计和执行教程</a></label>
<select id="cloudflare" name="cloudflare" class="form-control">
<option value = 0></option>
<option value = 1>是(MacOS不支持直接运行但可以自行编译</option>
<option value = 1>是(只支持Windows x64系统</option>
</select>
<label>浏览器模拟类型:</label>
<select id="environment" name="environment" class="form-control">

View File

@ -44,10 +44,10 @@ function detectLang(str) {
if (enCount === cnCount) {
return 2;
} else if (enCount > cnCount) {
return 0;
} else if (cnCount>=3) {
return 1;
}
return 1;
return 0;
}
Vue.filter('lang', function (value) {

View File

@ -208,6 +208,8 @@
<label>{{"User Data Folder (If you want to load the cookie, data and extension(s) from your local browser, please set this folder path, and then cilck the 'Run with (Data Mode)' button to run the task):~用户本地浏览器数据目录如果需要使用本地的登录信息插件和cookie请设置此目录并点击下方“执行带用户信息模式”按钮开始执行任务" | lang}}</label>
<input type="text" class="form-control" v-model="user_data_folder"></input>
</div>
<p v-if="task['cloudflare']==1">{{`要想过Cloudflare验证需要以下目录存在115版本的Chrome Beta版浏览器注意是Beta版不是正式版C:\\Program Files\\Google\\Chrome Beta。如果Beta版本不是115请在软件下载目录中找到Chrome_Beta_115_win64.7z压缩包然后解压并复制覆盖为C:\\Program Files\\Google\\Chrome Beta目录即可。~To pass the Cloudflare verification, you need the following directory to exist in the 115 version of Chrome Beta, note that it is the Beta version not the official version: C:\\Program Files\\Google\\Chrome Beta,
If the Beta version is not 115, please find the Chrome_Beta_115_win64.7z compressed package in the software download directory, then unzip and copy (overwrite) to the C:\\Program Files\\Google\\Chrome Beta directory.` | lang }}</p>
<div class="form-group" style="margin-top: 10px" v-if="task.outputFormat=='mysql'">
<label>{{"MySQL configuration file Path, relative to this folder:~MySQL配置文件路径路径相对此文件夹" | lang}} {{config_folder}}</label>
<input type="text" class="form-control" v-model="mysql_config_path"></input>

View File

@ -1 +1,312 @@
{"id":158,"name":"(子元素)京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/12/2023, 1:51:59 AM","update_time":"7/12/2023, 9:52:06 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"iPhone","value":"iPhone"}],"outputParameters":[{"id":0,"name":"参数40_文本","desc":"","type":"text","recordASField":1,"exampleValue":"iPhone"},{"id":1,"name":"参数47_文本","desc":"","type":"text","recordASField":1,"exampleValue":"剩余9天22时10分"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"iPhone","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search-btn\"]/i[1]","iframe":false,"wait":8,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":"3","scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/button[1]/i[1]","//i[contains(., '')]","/html/body/div[last()-6]/div/div[last()-2]/div/button/i"]}},{"id":4,"index":4,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[5],"isInLoop":false,"position":3,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li[1]/div[1]","//div[contains(., '')]","//DIV[@class='gl-i-wrap']","/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-29]/div"]}},{"id":5,"index":5,"parentId":4,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":1,"relative":true,"name":"参数40_文本","desc":"","relativeXPath":"/div[4]/a[1]/em[1]/font[3]","allXPaths":["/div[4]/a[1]/em[1]/font[3]","//font[contains(., 'iPhone')]","//FONT[@class='skcolor_ljg']","/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-24]/div/div[last()-5]/a/em/font"],"exampleValues":[{"num":5,"value":"iPhone"}],"unique_index":"/div[4]/a[1]/em[1]/font[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数47_文本","desc":"","relativeXPath":"//a/em[1]","allXPaths":["/div[10]/em[1]","//em[contains(., '剩余9天22时10分')]","/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-19]/div/div/em"],"exampleValues":[{"num":10,"value":"剩余9天22时10分"}],"unique_index":"/div[10]/em[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
{
"id": 158,
"name": "(子元素)京东全球版-专业的综合网上购物商城",
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"create_time": "7/12/2023, 1:51:59 AM",
"update_time": "7/12/2023, 9:52:06 AM",
"version": "0.3.5",
"saveThreshold": 10,
"cloudflare": 0,
"environment": 0,
"maxViewLength": 15,
"outputFormat": "xlsx",
"saveName": "current_time",
"containJudge": false,
"desc": "https://www.jd.com",
"inputParameters": [
{
"id": 0,
"name": "urlList_0",
"nodeId": 1,
"nodeName": "打开网页",
"value": "https://www.jd.com",
"desc": "要采集的网址列表,多行以\\n分开",
"type": "text",
"exampleValue": "https://www.jd.com"
},
{
"id": 1,
"name": "inputText_1",
"nodeName": "输入文字",
"nodeId": 2,
"desc": "要输入的文本,如京东搜索框输入:电脑",
"type": "text",
"exampleValue": "iPhone",
"value": "iPhone"
}
],
"outputParameters": [
{
"id": 0,
"name": "参数40_文本",
"desc": "",
"type": "text",
"recordASField": 1,
"exampleValue": "iPhone"
},
{
"id": 1,
"name": "参数47_文本",
"desc": "",
"type": "text",
"recordASField": 1,
"exampleValue": "剩余9天22时10分"
}
],
"graph": [
{
"index": 0,
"id": 0,
"parentId": 0,
"type": -1,
"option": 0,
"title": "root",
"sequence": [
1,
2,
3,
4
],
"parameters": {
"history": 1,
"tabIndex": 0,
"useLoop": false,
"xpath": "",
"wait": 0,
"waitType": 0
},
"isInLoop": false
},
{
"id": 1,
"index": 1,
"parentId": 0,
"type": 0,
"option": 1,
"title": "打开网页",
"sequence": [],
"isInLoop": false,
"position": 0,
"parameters": {
"useLoop": false,
"xpath": "",
"wait": 0,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"url": "https://www.jd.com",
"links": "https://www.jd.com",
"maxWaitTime": 10,
"scrollType": 0,
"scrollCount": 1,
"scrollWaitTime": 1,
"cookies": ""
}
},
{
"id": 2,
"index": 2,
"parentId": 0,
"type": 0,
"option": 4,
"title": "输入文字",
"sequence": [],
"isInLoop": false,
"position": 1,
"parameters": {
"history": 4,
"tabIndex": -1,
"useLoop": false,
"xpath": "//*[@id=\"key\"]",
"iframe": false,
"wait": 0,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"value": "iPhone",
"allXPaths": [
"/html/body/div[4]/div[1]/div[2]/div[1]/input[1]",
"//input[contains(., '')]",
"id(\"key\")",
"//INPUT[@class='text defcolor']",
"/html/body/div[last()-6]/div/div[last()-2]/div/input"
]
}
},
{
"id": 3,
"index": 3,
"parentId": 0,
"type": 0,
"option": 2,
"title": "点击元素",
"sequence": [],
"isInLoop": false,
"position": 2,
"parameters": {
"history": 4,
"tabIndex": -1,
"useLoop": false,
"xpath": "//*[@id=\"search-btn\"]/i[1]",
"iframe": false,
"wait": 8,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"scrollType": "3",
"scrollCount": 1,
"scrollWaitTime": 1,
"clickWay": 0,
"maxWaitTime": 10,
"paras": [],
"allXPaths": [
"/html/body/div[4]/div[1]/div[2]/div[1]/button[1]/i[1]",
"//i[contains(., '')]",
"/html/body/div[last()-6]/div/div[last()-2]/div/button/i"
]
}
},
{
"id": 4,
"index": 4,
"parentId": 0,
"type": 1,
"option": 8,
"title": "循环",
"sequence": [
5
],
"isInLoop": false,
"position": 3,
"parameters": {
"history": 5,
"tabIndex": -1,
"useLoop": false,
"xpath": "/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li/div[1]",
"iframe": false,
"wait": 0,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"scrollType": 0,
"scrollCount": 1,
"scrollWaitTime": 1,
"loopType": 1,
"pathList": "",
"textList": "",
"code": "",
"waitTime": 0,
"exitCount": 0,
"historyWait": 2,
"breakMode": 0,
"breakCode": "",
"breakCodeWaitTime": 0,
"allXPaths": [
"/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li[1]/div[1]",
"//div[contains(., '')]",
"//DIV[@class='gl-i-wrap']",
"/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-29]/div"
]
}
},
{
"id": 5,
"index": 5,
"parentId": 4,
"type": 0,
"option": 3,
"title": "提取数据",
"sequence": [],
"isInLoop": true,
"position": 0,
"parameters": {
"history": 5,
"tabIndex": -1,
"useLoop": false,
"xpath": "",
"iframe": false,
"wait": 0,
"waitType": 0,
"beforeJS": "",
"beforeJSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"paras": [
{
"nodeType": 0,
"contentType": 1,
"relative": true,
"name": "参数40_文本",
"desc": "",
"relativeXPath": "/div[4]/a[1]/em[1]/font[3]",
"allXPaths": [
"/div[4]/a[1]/em[1]/font[3]",
"//font[contains(., 'iPhone')]",
"//FONT[@class='skcolor_ljg']",
"/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-24]/div/div[last()-5]/a/em/font"
],
"exampleValues": [
{
"num": 5,
"value": "iPhone"
}
],
"unique_index": "/div[4]/a[1]/em[1]/font[3]",
"iframe": false,
"default": "",
"paraType": "text",
"recordASField": 1,
"beforeJS": "",
"beforeJSWaitTime": 0,
"JS": "",
"JSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"downloadPic": 0
},
{
"nodeType": 0,
"contentType": 1,
"relative": true,
"name": "参数47_文本",
"desc": "",
"relativeXPath": "//a/em[1]",
"allXPaths": [
"/div[10]/em[1]",
"//em[contains(., '剩余9天22时10分')]",
"/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-19]/div/div/em"
],
"exampleValues": [
{
"num": 10,
"value": "剩余9天22时10分"
}
],
"unique_index": "/div[10]/em[1]",
"iframe": false,
"default": "",
"paraType": "text",
"recordASField": 1,
"beforeJS": "",
"beforeJSWaitTime": 0,
"JS": "",
"JSWaitTime": 0,
"afterJS": "",
"afterJSWaitTime": 0,
"downloadPic": 0
}
],
"loopType": 1
}
}
]
}

View File

@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--id", "[4]", "--headless", "0", "--user_data", "1"]
"args": ["--id", "[3]", "--headless", "0", "--user_data", "0"]
}
]
}

View File

@ -1 +1 @@
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Document\\Projects\\EasySpider\\ElectronJS\\user_data"}
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}

View File

@ -615,13 +615,17 @@ class BrowserThread(Thread):
while True: # do while循环
try:
finished = False
newBodyText = self.browser.page_source
# newBodyText = self.browser.page_source
newBodyText = self.browser.find_element(By.XPATH, "//body").text
if newBodyText == bodyText: # 如果页面内容无变化
print("页面已检测不到新内容,停止循环。")
print("No new content detected on the page, stop loop.")
finished = True
break
else:
if node["parameters"]["exitCount"] == 0:
print("检测到页面变化,继续循环。")
print("Page changed detected, continue loop.")
bodyText = newBodyText
element = self.browser.find_element(
By.XPATH, node["parameters"]["xpath"], iframe=node["parameters"]["iframe"])
@ -1577,13 +1581,14 @@ if __name__ == '__main__':
browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path)
elif cloudflare == 1:
if sys.platform != "darwin":
options.binary_location = "" # 需要用自己的浏览器
if sys.platform == "win32":
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
browser_t = MyUCChrome(
options=options)
options=options, driver_executable_path=driver_path)
else:
print("Not support Cloudflare Mode on MacOS")
print("MacOS不支持Cloudflare验证模式")
print("Cloudflare模式只支持Windows x64平台。")
print("Cloudflare Mode only support on Windows x64 platform.")
sys.exit()
event = Event()
event.set()
@ -1607,9 +1612,9 @@ if __name__ == '__main__':
print("正在运行任务长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码如果想恢复任务的执行请再次长按p键。")
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
print("----------------------------------\n\n")
if cloudflare:
print("过Cloudflare验证模式有时候会不稳定请注意观察上方提示的浏览器版本信息是否正确,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。")
print("Passing the Cloudflare verification mode is sometimes unstable. Please pay attention to whether the browser version information prompted above is correct. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# if cloudflare:
# print("过Cloudflare验证模式有时候会不稳定如果无法通过验证则需要隔几分钟重试一次或者可以更换新的用户信息文件夹再执行任务。")
# print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# 使用监听器监听键盘输入
try:
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:

View File

@ -12,6 +12,8 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import sys
desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"
@ -89,11 +91,11 @@ class MyChrome(webdriver.Chrome):
else:
return super().find_elements(by=by, value=value)
import sys
if sys.platform != "darwin": # MacOS不支持Cloudflare
ES = 1
if ES == 1:
import undetected_chromedriver as uc
# MacOS不支持直接打包带Cloudflare的功能如果要自己编译运行可以把这个if去掉然后配置好浏览器和driver路径
if sys.platform != "darwin":
ES = True
if ES: # 用自己写的ES版本
import undetected_chromedriver_ES as uc
else:
import undetected_chromedriver as uc

View File

@ -371,6 +371,37 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver):
options.binary_location = (
browser_executable_path or find_chrome_executable(chrome_version)
)
if not os.path.exists(options.binary_location):
time.sleep(5)
# 如果没有安装可以在下面的链接下载安装https://www.google.com/chrome/beta/
print(f"""\n\n\n要想过Cloudflare验证需要以下目录存在115版本的Chrome Beta版浏览器注意是Beta版不是正式版C:\Program Files\Google\Chrome Beta
如果Beta版本不是115请在软件下载目录中找到Chrome_Beta_115_win64.7z压缩包然后解压并复制覆盖为C:\Program Files\Google\Chrome Beta目录即可
请手动关闭此程序配置完成后重新执行任务
""")
print("""To pass the Cloudflare verification, you need the following directory to exist in the 115 version of Chrome Beta, note that it is the Beta version not the official version: C:\Program Files\Google\Chrome Beta,
If the Beta version is not 115, please find the Chrome_Beta_115_win64.7z compressed package in the software download directory, then unzip and copy (overwrite) to the C:\Program Files\Google\Chrome Beta directory.
Please close this program manually and re-execute the task after the configuration is complete.
""")
time.sleep(100)
else:
folder_path = os.path.dirname(os.path.abspath(options.binary_location))
folder_list = [f for f in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, f))]
numeric_folders = [f for f in folder_list if f[0].isdigit()]
version = numeric_folders[0].split('.')[0]
if version != "115":
time.sleep(5)
print("Chrome Beta版本不是115请将Chrome Beta的版本替换为115 方法为下载115版本的Chrome Beta浏览器然后解压并覆盖C:\Program Files\Google\Chrome Beta目录即可软件下载目录中有Chrome_Beta_115_win64.7z版本的压缩包,可直接下载后解压替换。")
print("Chrome Beta version is not 115, please replace the version of Chrome Beta with 115, the method is to download the 115 version of Chrome Beta browser, then unzip and overwrite the C:\Program Files\Google\Chrome Beta directory, the software download directory has Chrome_Beta_115_win64.7z version of the compressed package, you can download and unzip directly to replace.")
print("\n请手动关闭此程序。\n")
print("\nPlease close this program manually.\n")
time.sleep(100)
print("Options Binary Location: ", options.binary_location)
@ -855,7 +886,11 @@ def find_chrome_executable(version):
candidates.add(os.sep.join((item, subitem, "chrome.exe")))
for candidate in candidates:
if os.path.exists(candidate) and os.access(candidate, os.X_OK):
print("\n\n\n软件将会使用以下目录的Chrome浏览器", os.path.normpath(candidate), ",请检查此浏览器版本是否为" + str(version) + "版本,如果不是将无法运行。")
print("The software will use the Chrome browser in the following directory:", os.path.normpath(candidate), "Please check if the version of this browser is version " + str(version) + ", if not, it will not be able to run.\n\n\n")
print(f"""\n\n\n要想过Cloudflare验证需要满足以下条件
自己的环境已经安装了115版本的Chrome Beta版浏览器注意是Beta版不是正式版且浏览器安装路径必须保持不变在C:\Program Files\Google\Chrome Beta\Application\chrome.exe
如果没有安装可以在下面的链接下载安装https://www.google.com/chrome/beta/
软件将会使用以下目录的Chrome Beta浏览器", {os.path.normpath(candidate)}, "请检查此浏览器版本是否为 115 版本的Beta浏览器如果不是将无法运行""")
# print("The software will use the Chrome browser in the following directory:", os.path.normpath(candidate), "Please check if the version of this browser is version " + str(version) + ", if not, it will not be able to run.\n\n\n")
print(f"""The software will use the Chrome browser in the following directory: {os.path.normpath(candidate)}, Please check if the version of this browser is version 115, if not, it will not be able to run.\n\n\n""")
time.sleep(5)
return os.path.normpath(candidate)

View File

@ -141,8 +141,8 @@ class Patcher(object):
folder_list = [f for f in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, f))]
numeric_folders = [f for f in folder_list if f[0].isdigit()]
version = numeric_folders[0].split('.')[0]
print(f"\n\n\nCloudflare下需要自行安装浏览器请确保自己的机器环境已经安装了 {numeric_folders[0].split('.')[0]} 版本的Chrome浏览器不是软件自带的Chrome浏览器需要自己安装浏览器且版本号一定要正确否则程序无法运行")
print("Please make sure that your machine environment has installed the Chrome browser version %s (not the Chrome browser provided by the software, you need to install the browser yourself and the version number must be correct), otherwise the program cannot run!" % numeric_folders[0].split('.')[0])
# print(f"\n\n\nCloudflare下需要自行安装浏览器请确保自己的机器环境已经安装了 {numeric_folders[0].split('.')[0]} 版本的Chrome浏览器不是软件自带的Chrome浏览器需要自己安装浏览器且版本号一定要正确否则程序无法运行")
# print("Please make sure that your machine environment has installed the Chrome browser version %s (not the Chrome browser provided by the software, you need to install the browser yourself and the version number must be correct), otherwise the program cannot run!" % numeric_folders[0].split('.')[0])
if not ispatched:
print("Patching chromedriver...")

View File

@ -130,7 +130,7 @@ This software is for learning and communication only. **It is strictly forbidden
For the crawler operations of government and military websites, **the author will not answer any questions** in order to avoid violating relevant national laws, regulations and policies.
同时,软件受到专利权保护,如要用于商业用途,请联系[杭州天勤知识产权代理有限公司](http://www.tqip.com/)进行专利授权等付费操作。
同时,软件受到专利权保护,如要用于商业用途,如使用软件进行盈利接单等,请联系[杭州天勤知识产权代理有限公司](http://www.tqip.com/)进行专利授权等付费操作。
At the same time, the software is protected by patent rights. If you want to use it for commercial purposes, please contact [Hangzhou Tianqin Intellectual Property Agency](http://www.tqip.com/) for patent authorization and other paid operations.