mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-20 10:05:00 +08:00
New Version Pre-Release
This commit is contained in:
parent
751fa6e055
commit
76b9b10dc7
@ -41,7 +41,7 @@ import pytesseract
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
# import uuid
|
# import uuid
|
||||||
from threading import Thread, Event
|
from threading import Thread, Event
|
||||||
from myChrome import MyChrome
|
from myChrome import MyChrome, MyUCChrome
|
||||||
from utils import check_pause, download_image, get_output_code, isnull, myMySQL, new_line, write_to_csv, write_to_excel
|
from utils import check_pause, download_image, get_output_code, isnull, myMySQL, new_line, write_to_csv, write_to_excel
|
||||||
desired_capabilities = DesiredCapabilities.CHROME
|
desired_capabilities = DesiredCapabilities.CHROME
|
||||||
desired_capabilities["pageLoadStrategy"] = "none"
|
desired_capabilities["pageLoadStrategy"] = "none"
|
||||||
@ -1473,7 +1473,7 @@ if __name__ == '__main__':
|
|||||||
browser_t = MyChrome(
|
browser_t = MyChrome(
|
||||||
options=options, chrome_options=option, executable_path=driver_path)
|
options=options, chrome_options=option, executable_path=driver_path)
|
||||||
elif cloudflare == 1:
|
elif cloudflare == 1:
|
||||||
browser_t = uc.Chrome(
|
browser_t = MyUCChrome(
|
||||||
options=options, chrome_options=option, executable_path=driver_path)
|
options=options, chrome_options=option, executable_path=driver_path)
|
||||||
print("Pass Cloudflare Mode")
|
print("Pass Cloudflare Mode")
|
||||||
print("过Cloudflare验证模式")
|
print("过Cloudflare验证模式")
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
|
|
||||||
|
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
from selenium.webdriver.common.keys import Keys
|
from selenium.webdriver.common.keys import Keys
|
||||||
from selenium.webdriver.common.action_chains import ActionChains
|
from selenium.webdriver.common.action_chains import ActionChains
|
||||||
@ -14,10 +12,12 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|||||||
from selenium.webdriver.support.ui import Select
|
from selenium.webdriver.support.ui import Select
|
||||||
from selenium.webdriver import ActionChains
|
from selenium.webdriver import ActionChains
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
|
import undetected_chromedriver as uc
|
||||||
desired_capabilities = DesiredCapabilities.CHROME
|
desired_capabilities = DesiredCapabilities.CHROME
|
||||||
desired_capabilities["pageLoadStrategy"] = "none"
|
desired_capabilities["pageLoadStrategy"] = "none"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MyChrome(webdriver.Chrome):
|
class MyChrome(webdriver.Chrome):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -89,3 +89,77 @@ class MyChrome(webdriver.Chrome):
|
|||||||
raise NoSuchElementException
|
raise NoSuchElementException
|
||||||
else:
|
else:
|
||||||
return super().find_elements(by=by, value=value)
|
return super().find_elements(by=by, value=value)
|
||||||
|
|
||||||
|
|
||||||
|
class MyUCChrome(uc.Chrome):
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.iframe_env = False # 现在的环境是root还是iframe
|
||||||
|
super().__init__(*args, **kwargs) # 调用父类的 __init__
|
||||||
|
|
||||||
|
def find_element(self, by=By.ID, value=None, iframe=False):
|
||||||
|
# 在这里改变查找元素的行为
|
||||||
|
if self.iframe_env:
|
||||||
|
super().switch_to.default_content()
|
||||||
|
self.iframe_env = False
|
||||||
|
if iframe:
|
||||||
|
# 获取所有的 iframe
|
||||||
|
try:
|
||||||
|
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
find_element = False
|
||||||
|
# 遍历所有的 iframe 并点击里面的元素
|
||||||
|
for iframe in iframes:
|
||||||
|
# 切换到 iframe
|
||||||
|
super().switch_to.default_content()
|
||||||
|
super().switch_to.frame(iframe)
|
||||||
|
self.iframe_env = True
|
||||||
|
try:
|
||||||
|
# 在 iframe 中查找并点击元素
|
||||||
|
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||||
|
element = super().find_element(by=by, value=value)
|
||||||
|
find_element = True
|
||||||
|
except:
|
||||||
|
print("No such element found in the iframe")
|
||||||
|
# 完成操作后切回主文档
|
||||||
|
# super().switch_to.default_content()
|
||||||
|
if find_element:
|
||||||
|
return element
|
||||||
|
if not find_element:
|
||||||
|
raise NoSuchElementException
|
||||||
|
else:
|
||||||
|
return super().find_element(by=by, value=value)
|
||||||
|
|
||||||
|
def find_elements(self, by=By.ID, value=None, iframe=False):
|
||||||
|
# 在这里改变查找元素的行为
|
||||||
|
if self.iframe_env:
|
||||||
|
super().switch_to.default_content()
|
||||||
|
self.iframe_env = False
|
||||||
|
if iframe:
|
||||||
|
# 获取所有的 iframe
|
||||||
|
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||||
|
find_element = False
|
||||||
|
# 遍历所有的 iframe 并点击里面的元素
|
||||||
|
for iframe in iframes:
|
||||||
|
# 切换到 iframe
|
||||||
|
try:
|
||||||
|
super().switch_to.default_content()
|
||||||
|
super().switch_to.frame(iframe)
|
||||||
|
self.iframe_env = True
|
||||||
|
# 在 iframe 中查找并点击元素
|
||||||
|
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||||
|
elements = super().find_elements(by=by, value=value)
|
||||||
|
if len(elements) > 0:
|
||||||
|
find_element = True
|
||||||
|
# 完成操作后切回主文档
|
||||||
|
# super().switch_to.default_content()
|
||||||
|
if find_element:
|
||||||
|
return elements
|
||||||
|
except:
|
||||||
|
print("No such element found in the iframe")
|
||||||
|
if not find_element:
|
||||||
|
raise NoSuchElementException
|
||||||
|
else:
|
||||||
|
return super().find_elements(by=by, value=value)
|
||||||
|
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1 +0,0 @@
|
|||||||
{"id":10,"name":"Page Not Found","url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","create_time":"7/8/2023, 8:04:15 AM","update_time":"7/8/2023, 8:04:49 AM","version":"0.3.5","saveThreshold":10,"cloudflare":1,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.genecards.org/lookup/text=Mrpl52","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://turnstile.zeroclover.io/","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://turnstile.zeroclover.io/"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":10,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"Collect Data","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body","allXPaths":["/html/body","//body[contains(., 'Captcha su')]","/html/body"],"exampleValues":[{"num":0,"value":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"unique_index":"hqoc6f3lcauljt8tjz5","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
|
|
@ -1 +0,0 @@
|
|||||||
{"id":11,"name":"Page Not Found","url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","create_time":"7/8/2023, 8:04:15 AM","update_time":"7/8/2023, 8:05:38 AM","version":"0.3.5","saveThreshold":10,"cloudflare":1,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.genecards.org/lookup/text=Mrpl52","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://turnstile.zeroclover.io/","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://turnstile.zeroclover.io/"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":19,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"Collect Data","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body","allXPaths":["/html/body","//body[contains(., 'Captcha su')]","/html/body"],"exampleValues":[{"num":0,"value":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"unique_index":"hqoc6f3lcauljt8tjz5","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1 +0,0 @@
|
|||||||
{"id":6,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 7:54:10 AM","update_time":"7/8/2023, 7:54:10 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"sadf<enter>","value":"sadf<enter>"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"sadf<enter>","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}}]}
|
|
@ -1 +0,0 @@
|
|||||||
{"id":7,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 7:54:10 AM","update_time":"7/8/2023, 7:54:46 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"sadf<enter>","value":"sadf<enter>"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":5,"waitType":"1","beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"sadf<enter>","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}}]}
|
|
@ -1 +0,0 @@
|
|||||||
{"id":8,"name":"iP地址查询--手机号码查询归属地 | 邮政编码查询 | iP地址归属地查询 | 身份证号码验证在线查询网","url":"https://www.ip138.com","links":"https://www.ip138.com","create_time":"7/8/2023, 8:00:49 AM","update_time":"7/8/2023, 8:00:49 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.ip138.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://www.ip138.com","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://www.ip138.com"}],"outputParameters":[{"id":0,"name":"para1_text","desc":"","type":"text","recordASField":1,"exampleValue":"\n您的iP地址是:[137.132.211.47 ] 来自:新加坡 \n"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,4,5,6],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.ip138.com","links":"https://www.ip138.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":2,"title":"Click Element","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/p[1]/a[1]","iframe":true,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/p[1]/a[1]","//a[contains(., '137.132.21')]","/html/body/p[last()-2]/a[last()-1]"]}},{"id":-1,"index":3,"parentId":0,"type":0,"option":2,"title":"Click Element","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/p[1]/a[1]","iframe":true,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/p[1]/a[1]","//a[contains(., '137.132.21')]","/html/body/p[last()-2]/a[last()-1]"]}},{"id":2,"index":4,"parentId":0,"type":0,"option":3,"title":"Collect Data","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":true,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"para1_text","desc":"","extractType":0,"relativeXPath":"/html/body/p[1]","allXPaths":["/html/body/p[1]","//p[contains(., '您的iP地址是:[')]","/html/body/p[last()-2]"],"exampleValues":[{"num":0,"value":"\n您的iP地址是:[137.132.211.47 ] 来自:新加坡 \n"}],"unique_index":"s1usikyht6ljt8p52a","iframe":true,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":3,"index":5,"parentId":0,"type":0,"option":2,"title":"Click Element","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/p[1]/a[1]","iframe":true,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/p[1]/a[1]","//a[contains(., '137.132.21')]","/html/body/p[last()-2]/a[last()-1]"]}},{"id":4,"index":6,"parentId":0,"type":0,"option":3,"title":"Collect Data","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":1,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"para1_text","desc":"","extractType":0,"relativeXPath":"/html/body/div[1]/div[2]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[2]/div[1]/h1[1]","allXPaths":["/html/body/div[1]/div[2]/div[2]/div[1]/div[2]/div[1]/div[1]/div[2]/div[2]/div[1]/h1[1]","//h1[contains(., '137.132.21')]","/html/body/div[last()-3]/div/div[last()-1]/div/div[last()-1]/div[last()-2]/div/div/div/div[last()-1]/h1"],"exampleValues":[{"num":0,"value":"137.132.211.47"}],"unique_index":"538ijdy2kiljt8pa2v","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
|
|
@ -1 +0,0 @@
|
|||||||
{"id":9,"name":"Page Not Found","url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","create_time":"7/8/2023, 8:04:15 AM","update_time":"7/8/2023, 8:04:15 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.genecards.org/lookup/text=Mrpl52","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://turnstile.zeroclover.io/","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://turnstile.zeroclover.io/"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":10,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"Collect Data","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body","allXPaths":["/html/body","//body[contains(., 'Captcha su')]","/html/body"],"exampleValues":[{"num":0,"value":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"unique_index":"hqoc6f3lcauljt8tjz5","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
|
|
File diff suppressed because one or more lines are too long
1
.temp_to_pub/EasySpider_windows_x64/tasks/114.json
Normal file
1
.temp_to_pub/EasySpider_windows_x64/tasks/114.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"id":114,"name":"Just a moment...","url":"https://turnstile.zeroclover.io/","links":"https://turnstile.zeroclover.io/","create_time":"7/8/2023, 8:31:17 AM","update_time":"7/8/2023, 8:32:01 AM","version":"0.3.5","saveThreshold":10,"cloudflare":1,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://turnstile.zeroclover.io/","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://turnstile.zeroclover.io/","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://turnstile.zeroclover.io/"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":10,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://turnstile.zeroclover.io/","links":"https://turnstile.zeroclover.io/","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body","allXPaths":["/html/body","//body[contains(., 'Captcha su')]","/html/body"],"exampleValues":[{"num":0,"value":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"unique_index":"fdncdf6wo2ljt9s7be","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
|
1
.temp_to_pub/EasySpider_windows_x64/tasks/115.json
Normal file
1
.temp_to_pub/EasySpider_windows_x64/tasks/115.json
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -47,10 +47,12 @@ if __name__ == "__main__":
|
|||||||
file_name = f"EasySpider_{easyspider_version}_windows_x64.7z"
|
file_name = f"EasySpider_{easyspider_version}_windows_x64.7z"
|
||||||
if os.path.exists("./EasySpider_windows_x64/user_data"):
|
if os.path.exists("./EasySpider_windows_x64/user_data"):
|
||||||
shutil.rmtree("./EasySpider_windows_x64/user_data")
|
shutil.rmtree("./EasySpider_windows_x64/user_data")
|
||||||
|
if os.path.exists("./EasySpider_windows_x64/Data"):
|
||||||
shutil.rmtree("./EasySpider_windows_x64/Data")
|
shutil.rmtree("./EasySpider_windows_x64/Data")
|
||||||
shutil.rmtree("./EasySpider_windows_x64/config.json")
|
if os.path.exists("./EasySpider_windows_x64/execution_instances"):
|
||||||
shutil.rmtree("./EasySpider_windows_x64/mysql_config.json")
|
|
||||||
shutil.rmtree("./EasySpider_windows_x64/execution_instances")
|
shutil.rmtree("./EasySpider_windows_x64/execution_instances")
|
||||||
|
os.remove("./EasySpider_windows_x64/config.json")
|
||||||
|
os.remove("./EasySpider_windows_x64/mysql_config.json")
|
||||||
os.mkdir("./EasySpider_windows_x64/Data")
|
os.mkdir("./EasySpider_windows_x64/Data")
|
||||||
os.mkdir("./EasySpider_windows_x64/execution_instances")
|
os.mkdir("./EasySpider_windows_x64/execution_instances")
|
||||||
compress_folder_to_7z_split("./EasySpider_windows_x64", file_name)
|
compress_folder_to_7z_split("./EasySpider_windows_x64", file_name)
|
||||||
@ -61,10 +63,12 @@ if __name__ == "__main__":
|
|||||||
file_name = f"EasySpider_{easyspider_version}_windows_x86.7z"
|
file_name = f"EasySpider_{easyspider_version}_windows_x86.7z"
|
||||||
if os.path.exists("./EasySpider_windows_x86/user_data"):
|
if os.path.exists("./EasySpider_windows_x86/user_data"):
|
||||||
shutil.rmtree("./EasySpider_windows_x86/user_data")
|
shutil.rmtree("./EasySpider_windows_x86/user_data")
|
||||||
|
if os.path.exists("./EasySpider_windows_x86/Data"):
|
||||||
shutil.rmtree("./EasySpider_windows_x86/Data")
|
shutil.rmtree("./EasySpider_windows_x86/Data")
|
||||||
|
if os.path.exists("./EasySpider_windows_x86/execution_instances"):
|
||||||
shutil.rmtree("./EasySpider_windows_x86/execution_instances")
|
shutil.rmtree("./EasySpider_windows_x86/execution_instances")
|
||||||
shutil.rmtree("./EasySpider_windows_x86/config.json")
|
os.remove("./EasySpider_windows_x86/config.json")
|
||||||
shutil.rmtree("./EasySpider_windows_x86/mysql_config.json")
|
os.remove("./EasySpider_windows_x86/mysql_config.json")
|
||||||
os.mkdir("./EasySpider_windows_x86/Data")
|
os.mkdir("./EasySpider_windows_x86/Data")
|
||||||
os.mkdir("./EasySpider_windows_x86/execution_instances")
|
os.mkdir("./EasySpider_windows_x86/execution_instances")
|
||||||
compress_folder_to_7z("./EasySpider_windows_x64", file_name)
|
compress_folder_to_7z("./EasySpider_windows_x64", file_name)
|
||||||
|
@ -551,7 +551,7 @@
|
|||||||
<input onkeydown="inputDelete(event)" required name="serviceName" value="新web采集任务" id="serviceName" class="form-control"></input>
|
<input onkeydown="inputDelete(event)" required name="serviceName" value="新web采集任务" id="serviceName" class="form-control"></input>
|
||||||
<label>任务描述:</label>
|
<label>任务描述:</label>
|
||||||
<input onkeydown="inputDelete(event)" id="serviceDescription" name="serviceDescription" class="form-control"></input>
|
<input onkeydown="inputDelete(event)" id="serviceDescription" name="serviceDescription" class="form-control"></input>
|
||||||
<label>导出数据格式(Excel/CSV/TXT/数据库):</label>
|
<label>导出数据格式(Excel/CSV/TXT/数据库,<a href="https://www.bilibili.com/video/BV1os4y1679S/" target="_blank">查看MySQL操作教程</a>):</label>
|
||||||
<select id="outputFormat" class="form-control">
|
<select id="outputFormat" class="form-control">
|
||||||
<option value = "xlsx">XLSX(EXCEL)</option>
|
<option value = "xlsx">XLSX(EXCEL)</option>
|
||||||
<option value = "csv">CSV</option>
|
<option value = "csv">CSV</option>
|
||||||
@ -560,7 +560,7 @@
|
|||||||
</select>
|
</select>
|
||||||
<label>导出文件名/数据库表格名称(名称中的“current_time”会被替换为执行任务时的时间戳):</label>
|
<label>导出文件名/数据库表格名称(名称中的“current_time”会被替换为执行任务时的时间戳):</label>
|
||||||
<input onkeydown="inputDelete(event)" value="current_time" id="saveName" class="form-control"></input>
|
<input onkeydown="inputDelete(event)" value="current_time" id="saveName" class="form-control"></input>
|
||||||
<label>是否为cloudflare等极端反爬网站:</label>
|
<label>是否为Cloudflare等极端反爬网站(<a href="https://www.bilibili.com/video/BV1Ph4y1E7R9/" target="_blank">查看Cloudflare设计和执行教程</a>):</label>
|
||||||
<select id="cloudflare" name="cloudflare" class="form-control">
|
<select id="cloudflare" name="cloudflare" class="form-control">
|
||||||
<option value = 0>否</option>
|
<option value = 0>否</option>
|
||||||
<option value = 1>是</option>
|
<option value = 1>是</option>
|
||||||
|
@ -74,6 +74,10 @@ Bilibili/B站视频教程:
|
|||||||
|
|
||||||
[实例 - 反人类网站文章采集和代码调试](https://www.bilibili.com/video/BV11W4y1D71t/)
|
[实例 - 反人类网站文章采集和代码调试](https://www.bilibili.com/video/BV11W4y1D71t/)
|
||||||
|
|
||||||
|
[写入MySQL数据库教程](https://www.bilibili.com/video/BV1os4y1679S/)
|
||||||
|
|
||||||
|
[Cloudflare等极端反爬网站如何爬取](https://www.bilibili.com/video/BV1Ph4y1E7R9/)
|
||||||
|
|
||||||
Refer to [Youtube Playlist](https://youtube.com/playlist?list=PL0kEFEkWrT7mt9MUlEBV2DTo1QsaanUTp) to see the video tutorials of EasySpider.
|
Refer to [Youtube Playlist](https://youtube.com/playlist?list=PL0kEFEkWrT7mt9MUlEBV2DTo1QsaanUTp) to see the video tutorials of EasySpider.
|
||||||
|
|
||||||
## 样例任务/Sample Tasks
|
## 样例任务/Sample Tasks
|
||||||
|
Loading…
x
Reference in New Issue
Block a user