diff --git a/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py b/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py index 18fea58..17a8ba6 100644 --- a/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py +++ b/.temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py @@ -1577,13 +1577,14 @@ if __name__ == '__main__': browser_t = MyChrome( options=options, chrome_options=option, executable_path=driver_path) elif cloudflare == 1: - if sys.platform != "darwin": - options.binary_location = "" # 需要用自己的浏览器 + if sys.platform == "win32": + options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器 + # options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器 browser_t = MyUCChrome( options=options, driver_executable_path=driver_path) else: - print("Not support Cloudflare Mode on MacOS") - print("MacOS不支持Cloudflare验证模式") + print("Cloudflare模式只支持Windows x64平台。") + print("Cloudflare Mode only support on Windows x64 platform.") sys.exit() event = Event() event.set() @@ -1607,9 +1608,9 @@ if __name__ == '__main__': print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。") print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.") print("----------------------------------\n\n") - if cloudflare: - print("过Cloudflare验证模式有时候会不稳定,请注意观察上方提示的浏览器版本信息是否正确,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。") - print("Passing the Cloudflare verification mode is sometimes unstable. Please pay attention to whether the browser version information prompted above is correct. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.") + # if cloudflare: + # print("过Cloudflare验证模式有时候会不稳定,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。") + # print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.") # 使用监听器监听键盘输入 try: with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener: diff --git a/.temp_to_pub/EasySpider_windows_x64/Code/myChrome.py b/.temp_to_pub/EasySpider_windows_x64/Code/myChrome.py index 27b55e2..a86f670 100644 --- a/.temp_to_pub/EasySpider_windows_x64/Code/myChrome.py +++ b/.temp_to_pub/EasySpider_windows_x64/Code/myChrome.py @@ -12,6 +12,8 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.support.ui import Select from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By +import sys + desired_capabilities = DesiredCapabilities.CHROME desired_capabilities["pageLoadStrategy"] = "none" @@ -89,9 +91,13 @@ class MyChrome(webdriver.Chrome): else: return super().find_elements(by=by, value=value) -import sys -if sys.platform != "darwin": # MacOS不支持Cloudflare - import undetected_chromedriver_ES as uc +# MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径 +if sys.platform != "darwin": + ES = True + if ES: # 用自己写的ES版本 + import undetected_chromedriver_ES as uc + else: + import undetected_chromedriver as uc class MyUCChrome(uc.Chrome): diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json new file mode 100644 index 0000000..170f0c4 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json @@ -0,0 +1 @@ +{"id":0,"name":"详情页","url":"https://www.dongchedi.com/article/7254469214726324796","links":"https://www.dongchedi.com/article/7254469214726324796","create_time":"2023/7/11 17:53:04","update_time":"2023/7/11 17:54:46","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.dongchedi.com/article/7254469214726324796","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.dongchedi.com/article/7254469214726324796","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.dongchedi.com/article/7254469214726324796"}],"outputParameters":[{"id":0,"name":"参数1_页面标题","desc":"","type":"text","recordASField":1,"exampleValue":"荣威D7 DMH混动版官图发布 定位中大型轿车 续航1400km_懂车帝"},{"id":1,"name":"参数2_文本","desc":"","type":"text","recordASField":1,"exampleValue":"日前,荣威D7 DMH(图片)混动版官图正式发布。据悉,新车定位新能源中大型轿车,将会在年内上市发售。外观方面,新车将采用全新的设计风格,整体造型十分时尚且富有运动感。值得注意的是,新车并没有采用与电动版相同的分体式大灯设计,而是相对常规的样式。车身尺寸方面,新车长宽高分别为4890/1890/1510mm,轴距为2810mm。动力方面,新车将搭载热效率大于43%的混动专用发动机,CLTC工况下纯电续航里程为125km,综合续航里程为1400km,馈电油耗为4.3L/100km。"},{"id":2,"name":"参数4_图片地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://p9-dcd.byteimg.com/img/motor-article-img/0e7a4f1c6e89438ea8dc22163d80fae3~noop.webp"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.dongchedi.com/article/7254469214726324796","links":"https://www.dongchedi.com/article/7254469214726324796","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":6,"relative":false,"name":"参数1_页面标题","desc":"","extractType":0,"relativeXPath":"/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/aside[1]/div[1]/h2[1]","allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/aside[1]/div[1]/h2[1]","//h2[contains(., '相关推荐')]","//H2[@class='jsx-1932881358 title']","/html/body/div[last()-5]/div/div/div/div/div/aside/div[last()-1]/h2"],"exampleValues":[{"num":0,"value":"荣威D7 DMH混动版官图发布 定位中大型轿车 续航1400km_懂车帝"}],"unique_index":"zq1hj9zt0inljy40dht","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"参数2_文本","desc":"","extractType":0,"relativeXPath":"id(\"article\")","allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]","//section[contains(., '日前,荣威D7 DM')]","id(\"article\")","//SECTION[@class='jsx-3371063651 article-content']","/html/body/div[last()-5]/div/div/div/div/div/main/section/div[last()-1]/article/div[last()-1]/div/section"],"exampleValues":[{"num":0,"value":"日前,荣威D7 DMH(图片)混动版官图正式发布。据悉,新车定位新能源中大型轿车,将会在年内上市发售。外观方面,新车将采用全新的设计风格,整体造型十分时尚且富有运动感。值得注意的是,新车并没有采用与电动版相同的分体式大灯设计,而是相对常规的样式。车身尺寸方面,新车长宽高分别为4890/1890/1510mm,轴距为2810mm。动力方面,新车将搭载热效率大于43%的混动专用发动机,CLTC工况下纯电续航里程为125km,综合续航里程为1400km,馈电油耗为4.3L/100km。"}],"unique_index":"zcgjfmkb41ljy4164a","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":-1,"index":3,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[4],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]/div/img[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]/div[1]/img[1]","//img[contains(., '')]","/html/body/div[last()-6]/div[last()-1]/div/div/div/div/main/section/div[last()-1]/article/div[last()-1]/div/section/div[last()-4]/img"]}},{"id":-1,"index":4,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"参数3_图片地址","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://p9-dcd.byteimg.com/img/motor-article-img/0e7a4f1c6e89438ea8dc22163d80fae3~noop.webp"}],"unique_index":"wdaxxokem3ljy44mzo","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":1}],"loopType":1}},{"id":3,"index":5,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[6],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"id(\"article\")//img","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]/div[1]/img[1]","//img[contains(., '')]","/html/body/div[last()-6]/div[last()-1]/div/div/div/div/main/section/div[last()-1]/article/div[last()-1]/div/section/div[last()-4]/img"]}},{"id":4,"index":6,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"参数4_图片地址","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://p9-dcd.byteimg.com/img/motor-article-img/0e7a4f1c6e89438ea8dc22163d80fae3~noop.webp"}],"unique_index":"bmdeqk77gfdljy45n1u","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/tasks/127.json b/.temp_to_pub/EasySpider_windows_x64/tasks/127.json new file mode 100644 index 0000000..a12c992 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/tasks/127.json @@ -0,0 +1 @@ +{"id":127,"name":"百度地图","url":"https://map.baidu.com","links":"https://map.baidu.com","create_time":"2023/7/12 20:59:16","update_time":"7/13/2023, 3:59:48 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://map.baidu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://map.baidu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://map.baidu.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"北京市","value":"北京市"},{"id":2,"name":"inputText_2","nodeName":"输入文字","nodeId":4,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"门窗","value":"门窗"},{"id":3,"name":"loopTimes_循环点击下一页_3","nodeId":7,"nodeName":"循环点击下一页","desc":"循环循环点击下一页执行的次数(0代表无限循环)","type":"int","exampleValue":0,"value":0}],"outputParameters":[{"id":0,"name":"参数8_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"鑫隆门窗"},{"id":1,"name":"参数11_文本","desc":"","type":"text","recordASField":1,"exampleValue":"电话:13717842988"},{"id":2,"name":"参数10_文本","desc":"","type":"text","recordASField":1,"exampleValue":"北京市朝阳区红霞路13号"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,4,5,6,7],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://map.baidu.com","links":"https://map.baidu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"sole-input\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"北京市","allXPaths":["/html/body/div[1]/div[2]/div[1]/div[1]/div[1]/input[1]","//input[contains(., '')]","id(\"sole-input\")","//INPUT[@class='searchbox-content-common']","//INPUT[@name='word']","/html/body/div[last()-5]/div[last()-5]/div/div[last()-1]/div/input"]}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search-button\"]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[1]/div[2]/div[1]/button[1]","//button[contains(., '')]","id(\"search-button\")","/html/body/div[last()-7]/div[last()-5]/div/button"]}},{"id":4,"index":4,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"sole-input\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"门窗","allXPaths":["/html/body/div[1]/div[2]/div[1]/div[1]/div[1]/input[1]","//input[contains(., '')]","id(\"sole-input\")","//INPUT[@class='searchbox-content-common']","//INPUT[@name='word']","/html/body/div[last()-7]/div[last()-5]/div/div[last()-1]/div/input"]}},{"id":5,"index":5,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":4,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search-button\"]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[1]/div[2]/div[1]/button[1]","//button[contains(., '')]","id(\"search-button\")","/html/body/div[last()-8]/div[last()-5]/div/button"]}},{"id":6,"index":6,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":5,"parameters":{"history":6,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"leadDownloadCard\"]/div[1]/div[1]/div[3]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[1]/div[2]/ul[2]/li[1]/div[1]/div[2]/div[1]/div[1]/div[3]","//div[contains(., '×')]","//DIV[@class='close-btn-download-banner']","/html/body/div[last()-8]/div[last()-5]/ul[last()-1]/li/div/div/div/div/div"]}},{"id":7,"index":7,"parentId":0,"type":1,"option":8,"title":"循环点击下一页","sequence":[9,8],"isInLoop":false,"position":6,"parameters":{"history":6,"tabIndex":-1,"useLoop":false,"xpath":"//a[contains(., '下一页>')]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":0,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[2]/ul[2]/li[1]/div[1]/div[1]/div[6]/p[1]/span[5]/a[1]","//a[contains(., '下一页>')]","/html/body/div[last()-8]/div[last()-5]/ul[last()-1]/li/div/div[last()-1]/div[last()-2]/p/span/a"]}},{"id":9,"index":8,"parentId":7,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":6,"tabIndex":-1,"useLoop":true,"xpath":"//*[contains(@class, \"page\")]/span[5]/a[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[1]/div[2]/ul[2]/li[1]/div[1]/div[1]/div[6]/p[1]/span[5]/a[1]","//a[contains(., '下一页>')]","/html/body/div[last()-8]/div[last()-5]/ul[last()-1]/li/div/div[last()-1]/div[last()-2]/p/span/a"],"loopType":0}},{"id":8,"index":9,"parentId":7,"type":1,"option":8,"title":"循环","sequence":[10],"isInLoop":true,"position":0,"parameters":{"history":8,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div/div[2]/ul[2]/li[1]/div[1]/div[1]/ul[1]/li","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[2]/ul[2]/li[1]/div[1]/div[1]/ul[1]/li[1]","//li[contains(., '')]","//LI[@class='search-item base-item']","/html/body/div[last()-8]/div[last()-5]/ul[last()-1]/li/div/div[last()-1]/ul/li[last()-9]"]}},{"id":10,"index":10,"parentId":8,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":8,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数8_链接文本","desc":"","relativeXPath":"/div[1]/div[3]/div[1]/span[1]/a[1]","allXPaths":["/div[1]/div[3]/div[1]/span[1]/a[1]","//a[contains(., '鑫隆门窗')]","//A[@class='n-blue']","/html/body/div[last()-8]/div[last()-5]/ul[last()-1]/li/div/div[last()-1]/ul/li[last()-9]/div[last()-1]/div/div[last()-3]/span[last()-1]/a"],"exampleValues":[{"num":0,"value":"鑫隆门窗"}],"unique_index":"/div[1]/div[3]/div[1]/span[1]/a[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数11_文本","desc":"","relativeXPath":"/div[1]/div[3]/div[3]","allXPaths":["/div[1]/div[3]/div[3]","//div[contains(., '')]","//DIV[@class='row tel']","/html/body/div[last()-8]/div[last()-5]/ul[last()-1]/li/div/div[last()-1]/ul/li[last()-9]/div[last()-1]/div/div[last()-1]"],"exampleValues":[{"num":0,"value":"电话:13717842988"}],"unique_index":"/div[1]/div[3]/div[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数10_文本","desc":"","relativeXPath":"/div[1]/div[3]/div[2]/span[1]","allXPaths":["/div[1]/div[3]/div[2]/span[1]","//span[contains(., '北京市朝阳区红霞路1')]","//SPAN[@class='n-grey']","/html/body/div[last()-8]/div[last()-5]/ul[last()-1]/li/div/div[last()-1]/ul/li[last()-9]/div[last()-1]/div/div[last()-2]/span"],"exampleValues":[{"num":0,"value":"北京市朝阳区红霞路13号"}],"unique_index":"/div[1]/div[3]/div[2]/span[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/tasks/49.json b/.temp_to_pub/EasySpider_windows_x64/tasks/49.json index b05da19..cc58f20 100644 --- a/.temp_to_pub/EasySpider_windows_x64/tasks/49.json +++ b/.temp_to_pub/EasySpider_windows_x64/tasks/49.json @@ -1 +1 @@ -{"id":49,"name":"中国地震台网——历史查询","url":"http://www.ceic.ac.cn/history","links":"http://www.ceic.ac.cn/history","create_time":"7/1/2023, 9:06:45 PM","version":"0.3.3","containJudge":false,"desc":"http://www.ceic.ac.cn/history","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"http://www.ceic.ac.cn/history","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"http://www.ceic.ac.cn/history"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"string","exampleValue":"12","value":"12"},{"id":2,"name":"loopTimes_循环_2","nodeId":4,"nodeName":"循环","desc":"循环循环执行的次数(0代表无限循环)","type":"int","exampleValue":0,"value":0}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"3.8"},{"id":1,"name":"参数2_文本","desc":"","type":"string","exampleValue":"2023-06-1117:40:14"},{"id":2,"name":"参数3_文本","desc":"","type":"string","exampleValue":"40.79"},{"id":3,"name":"参数4_文本","desc":"","type":"string","exampleValue":"82.63"},{"id":4,"name":"参数5_文本","desc":"","type":"string","exampleValue":"20"},{"id":5,"name":"参数6_链接文本","desc":"","type":"string","exampleValue":"新疆阿克苏地区沙雅县"},{"id":6,"name":"参数7_链接地址","desc":"","type":"string","exampleValue":"https://news.ceic.ac.cn/CD20230611174015.html"},{"id":7,"name":"自定义参数_7","desc":"","type":"string","exampleValue":"自定义字段"},{"id":8,"name":"自定义参数_8","desc":"","type":"string","exampleValue":"自定义字段"},{"id":9,"name":"自定义参数_9","desc":"","type":"string","exampleValue":"自定义字段"},{"id":10,"name":"自定义参数_10","desc":"","type":"string","exampleValue":"自定义字段"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"http://www.ceic.ac.cn/history","links":"http://www.ceic.ac.cn/history","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"weidu1\"]","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"12","allXPaths":["/html/body/div[1]/div[3]/div[1]/div[1]/div[1]/form[1]/div[2]/input[1]","//input[contains(., '')]","id(\"weidu1\")","//INPUT[@class='span1']","//INPUT[@name='weidu1']","/html/body/div[last()-3]/div[last()-1]/div/div/div[last()-1]/form/div[last()-3]/input[last()-1]"]}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search\"]","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[1]/div[3]/div[1]/div[1]/div[1]/form[1]/div[5]/a[1]","//a[contains(., '查询')]","id(\"search\")","//A[@class='check']","/html/body/div[last()-3]/div[last()-1]/div/div/div[last()-1]/form/div/a"]}},{"id":4,"index":4,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[6,9,5],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"pagination\")]/ul[1]/li[last()-1]/a[1]","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":0,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[3]/div[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[1]/li[10]/a[1]","//a[contains(., '»')]","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div/div/div/ul/li[last()-1]/a"]}},{"id":7,"index":5,"parentId":4,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"//*[contains(@class, \"pagination\")]/ul[1]/li[10]/a[1]","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[1]/div[3]/div[1]/div[1]/div[2]/div[1]/div[2]/div[1]/div[1]/ul[1]/li[10]/a[1]","//a[contains(., '»')]","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div/div/div/ul/li[last()-1]/a"],"loopType":0}},{"id":5,"index":6,"parentId":4,"type":1,"option":8,"title":"循环","sequence":[7],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[3]/div[1]/div[1]/div[2]/div[1]/div[1]/table[1]/tbody[1]/tr","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[3]/div[1]/div[1]/div[2]/div[1]/div[1]/table[1]/tbody[1]/tr[1]","//tr[contains(., '震级(M)发震时刻(')]","//TR[@class='speed-tr-h1']","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div[last()-1]/table/tbody/tr[last()-20]"]}},{"id":8,"index":7,"parentId":5,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","relativeXPath":"/td[1]","allXPaths":["/td[1]","//td[contains(., '3.8')]","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div[last()-1]/table/tbody/tr/td[last()-5]"],"exampleValues":[{"num":0,"value":"3.8"},{"num":2,"value":"3.8"},{"num":3,"value":"4.4"},{"num":4,"value":"5.6"},{"num":5,"value":"5.3"},{"num":6,"value":"3.1"},{"num":7,"value":"3.2"},{"num":8,"value":"3.7"},{"num":9,"value":"3.6"},{"num":10,"value":"6.2"},{"num":11,"value":"2.2"},{"num":12,"value":"3.0"},{"num":13,"value":"4.1"},{"num":14,"value":"5.2"},{"num":15,"value":"3.6"},{"num":16,"value":"3.7"},{"num":17,"value":"3.3"},{"num":18,"value":"4.3"},{"num":19,"value":"3.6"},{"num":20,"value":"6.2"}],"unique_index":"/td[1]","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":true,"name":"参数2_文本","desc":"","relativeXPath":"/td[2]","allXPaths":["/td[2]","//td[contains(., '2023-06-11')]","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div[last()-1]/table/tbody/tr/td[last()-4]"],"exampleValues":[{"num":0,"value":"2023-06-1117:40:14"},{"num":2,"value":"2023-06-1801:08:11"},{"num":3,"value":"2023-06-1800:14:24"},{"num":4,"value":"2023-06-1719:35:59"},{"num":5,"value":"2023-06-1708:26:14"},{"num":6,"value":"2023-06-1708:05:51"},{"num":7,"value":"2023-06-1601:19:35"},{"num":8,"value":"2023-06-1521:58:09"},{"num":9,"value":"2023-06-1511:21:27"},{"num":10,"value":"2023-06-1510:19:24"},{"num":11,"value":"2023-06-1422:24:41"},{"num":12,"value":"2023-06-1413:39:40"},{"num":13,"value":"2023-06-1404:17:56"},{"num":14,"value":"2023-06-1316:03:43"},{"num":15,"value":"2023-06-1308:48:30"},{"num":16,"value":"2023-06-1305:52:59"},{"num":17,"value":"2023-06-1200:04:18"},{"num":18,"value":"2023-06-1120:25:38"},{"num":19,"value":"2023-06-1119:29:45"},{"num":20,"value":"2023-06-1117:54:45"}],"unique_index":"/td[2]","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":true,"name":"参数3_文本","desc":"","relativeXPath":"/td[3]","allXPaths":["/td[3]","//td[contains(., '40.79')]","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div[last()-1]/table/tbody/tr/td[last()-3]"],"exampleValues":[{"num":0,"value":"40.79"},{"num":2,"value":"35.80"},{"num":3,"value":"35.79"},{"num":4,"value":"47.75"},{"num":5,"value":"41.10"},{"num":6,"value":"39.58"},{"num":7,"value":"38.31"},{"num":8,"value":"40.14"},{"num":9,"value":"32.44"},{"num":10,"value":"13.80"},{"num":11,"value":"37.12"},{"num":12,"value":"38.16"},{"num":13,"value":"41.71"},{"num":14,"value":"33.10"},{"num":15,"value":"40.18"},{"num":16,"value":"33.10"},{"num":17,"value":"43.38"},{"num":18,"value":"24.26"},{"num":19,"value":"48.74"},{"num":20,"value":"42.50"}],"unique_index":"/td[3]","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":true,"name":"参数4_文本","desc":"","relativeXPath":"/td[4]","allXPaths":["/td[4]","//td[contains(., '82.63')]","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div[last()-1]/table/tbody/tr/td[last()-2]"],"exampleValues":[{"num":0,"value":"82.63"},{"num":2,"value":"79.78"},{"num":3,"value":"79.83"},{"num":4,"value":"147.60"},{"num":5,"value":"142.80"},{"num":6,"value":"82.57"},{"num":7,"value":"89.40"},{"num":8,"value":"77.46"},{"num":9,"value":"94.24"},{"num":10,"value":"120.85"},{"num":11,"value":"114.78"},{"num":12,"value":"88.79"},{"num":13,"value":"80.81"},{"num":14,"value":"75.80"},{"num":15,"value":"83.80"},{"num":16,"value":"86.73"},{"num":17,"value":"88.98"},{"num":18,"value":"122.47"},{"num":19,"value":"129.79"},{"num":20,"value":"142.00"}],"unique_index":"/td[4]","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":true,"name":"参数5_文本","desc":"","relativeXPath":"/td[5]","allXPaths":["/td[5]","//td[contains(., '20')]","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div[last()-1]/table/tbody/tr/td[last()-1]"],"exampleValues":[{"num":0,"value":"20"},{"num":2,"value":"10"},{"num":3,"value":"10"},{"num":4,"value":"430"},{"num":5,"value":"10"},{"num":6,"value":"20"},{"num":7,"value":"10"},{"num":8,"value":"23"},{"num":9,"value":"10"},{"num":10,"value":"100"},{"num":11,"value":"10"},{"num":12,"value":"9"},{"num":13,"value":"10"},{"num":14,"value":"20"},{"num":15,"value":"18"},{"num":16,"value":"10"},{"num":17,"value":"19"},{"num":18,"value":"27"},{"num":19,"value":"17"},{"num":20,"value":"130"}],"unique_index":"/td[5]","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":1,"contentType":0,"relative":true,"name":"参数6_链接文本","desc":"","relativeXPath":"/td[6]/a[1]","allXPaths":["/td[6]/a[1]","//a[contains(., '新疆阿克苏地区沙雅县')]","id(\"cid\")","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div[last()-1]/table/tbody/tr/td/a"],"exampleValues":[{"num":0,"value":"新疆阿克苏地区沙雅县"},{"num":2,"value":"新疆和田地区和田县"},{"num":3,"value":"新疆和田地区和田县"},{"num":4,"value":"千岛群岛西北"},{"num":5,"value":"日本本州东岸近海"},{"num":6,"value":"新疆阿克苏地区沙雅县"},{"num":7,"value":"新疆巴音郭楞州若羌县"},{"num":8,"value":"新疆克孜勒苏州阿图什市"},{"num":9,"value":"西藏那曲市巴青县"},{"num":10,"value":"菲律宾"},{"num":11,"value":"河北邢台市任泽区"},{"num":12,"value":"新疆巴音郭楞州若羌县"},{"num":13,"value":"新疆阿克苏地区温宿县"},{"num":14,"value":"克什米尔地区"},{"num":15,"value":"新疆阿克苏地区沙雅县"},{"num":16,"value":"西藏那曲市尼玛县"},{"num":17,"value":"新疆吐鲁番市高昌区"},{"num":18,"value":"台湾花莲县海域"},{"num":19,"value":"黑龙江伊春市嘉荫县"},{"num":20,"value":"日本北海道"}],"unique_index":"/td[6]/a[1]","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数7_链接地址","desc":"","relativeXPath":"/td[6]/a[1]","allXPaths":["/td[6]/a[1]","//a[contains(., '新疆阿克苏地区沙雅县')]","id(\"cid\")","/html/body/div[last()-3]/div[last()-1]/div/div/div/div/div[last()-1]/table/tbody/tr/td/a"],"exampleValues":[{"num":0,"value":"https://news.ceic.ac.cn/CD20230611174015.html"},{"num":2,"value":"https://news.ceic.ac.cn/CD20230618010812.html"},{"num":3,"value":"https://news.ceic.ac.cn/CD20230618001425.html"},{"num":4,"value":"https://news.ceic.ac.cn/CC20230617193560.html"},{"num":5,"value":"https://news.ceic.ac.cn/CC20230617082615.html"},{"num":6,"value":"https://news.ceic.ac.cn/CD20230617080552.html"},{"num":7,"value":"https://news.ceic.ac.cn/CD20230616011935.html"},{"num":8,"value":"https://news.ceic.ac.cn/CD20230615215810.html"},{"num":9,"value":"https://news.ceic.ac.cn/CD20230615112127.html"},{"num":10,"value":"https://news.ceic.ac.cn/CC20230615101924.html"},{"num":11,"value":"https://news.ceic.ac.cn/CD20230614222441.html"},{"num":12,"value":"https://news.ceic.ac.cn/CD20230614133941.html"},{"num":13,"value":"https://news.ceic.ac.cn/CD20230614041757.html"},{"num":14,"value":"https://news.ceic.ac.cn/CC20230613160344.html"},{"num":15,"value":"https://news.ceic.ac.cn/CD20230613084830.html"},{"num":16,"value":"https://news.ceic.ac.cn/CC20230613055259.html"},{"num":17,"value":"https://news.ceic.ac.cn/CD20230612000419.html"},{"num":18,"value":"https://news.ceic.ac.cn/CD20230611202539.html"},{"num":19,"value":"https://news.ceic.ac.cn/CD20230611192945.html"},{"num":20,"value":"https://news.ceic.ac.cn/CC20230611175446.html"}],"unique_index":"/td[6]/a[1]","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":true,"name":"自定义参数_7","desc":"","extractType":0,"relativeXPath":"","allXPaths":[],"exampleValues":[{"num":0,"value":"自定义字段"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"自定义参数_8","desc":"","extractType":0,"relativeXPath":"/html/body/div[1]/div[2]/div/ul/li[2]/a","allXPaths":[],"exampleValues":[{"num":0,"value":"自定义字段"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"自定义参数_9","desc":"","extractType":0,"relativeXPath":"//DIV[@class='link']","allXPaths":[],"exampleValues":[{"num":0,"value":"自定义字段"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":true,"name":"自定义参数_10","desc":"","extractType":0,"relativeXPath":"/a[1]","allXPaths":[],"exampleValues":[{"num":0,"value":"自定义字段"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":-1,"index":8,"parentId":5,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":3,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":["/html/body/div[1]/div[4]/div[1]","//div[contains(., '网')]","//DIV[@class='link']","/html/body/div[last()-3]/div/div"],"exampleValues":[{"num":0,"value":"\n 网站声明\n  | \n 京ICP备06028819-4号\n  | \n 联系我们:houjm@seis.ac.cn\n "}],"unique_index":"ka0h5psz1whljjtcvkn","default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":6,"index":9,"parentId":4,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"codeMode":0,"code":"document.querySelector(\"#zhenji2\").value = 'Field[\"参数3_文本\"]'","waitTime":0,"recordASField":0}}]} \ No newline at end of file +{"id":49,"name":"详情页","url":"https://www.dongchedi.com/article/7254469214726324796","links":"https://www.dongchedi.com/article/7254469214726324796","create_time":"2023/7/11 17:53:04","update_time":"2023/7/11 17:54:46","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.dongchedi.com/article/7254469214726324796","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.dongchedi.com/article/7254469214726324796","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.dongchedi.com/article/7254469214726324796"}],"outputParameters":[{"id":0,"name":"参数1_页面标题","desc":"","type":"text","recordASField":1,"exampleValue":"荣威D7 DMH混动版官图发布 定位中大型轿车 续航1400km_懂车帝"},{"id":1,"name":"参数2_文本","desc":"","type":"text","recordASField":1,"exampleValue":"日前,荣威D7 DMH(图片)混动版官图正式发布。据悉,新车定位新能源中大型轿车,将会在年内上市发售。外观方面,新车将采用全新的设计风格,整体造型十分时尚且富有运动感。值得注意的是,新车并没有采用与电动版相同的分体式大灯设计,而是相对常规的样式。车身尺寸方面,新车长宽高分别为4890/1890/1510mm,轴距为2810mm。动力方面,新车将搭载热效率大于43%的混动专用发动机,CLTC工况下纯电续航里程为125km,综合续航里程为1400km,馈电油耗为4.3L/100km。"},{"id":2,"name":"参数4_图片地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://p9-dcd.byteimg.com/img/motor-article-img/0e7a4f1c6e89438ea8dc22163d80fae3~noop.webp"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.dongchedi.com/article/7254469214726324796","links":"https://www.dongchedi.com/article/7254469214726324796","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":6,"relative":false,"name":"参数1_页面标题","desc":"","extractType":0,"relativeXPath":"/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/aside[1]/div[1]/h2[1]","allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/aside[1]/div[1]/h2[1]","//h2[contains(., '相关推荐')]","//H2[@class='jsx-1932881358 title']","/html/body/div[last()-5]/div/div/div/div/div/aside/div[last()-1]/h2"],"exampleValues":[{"num":0,"value":"荣威D7 DMH混动版官图发布 定位中大型轿车 续航1400km_懂车帝"}],"unique_index":"zq1hj9zt0inljy40dht","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"参数2_文本","desc":"","extractType":0,"relativeXPath":"id(\"article\")","allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]","//section[contains(., '日前,荣威D7 DM')]","id(\"article\")","//SECTION[@class='jsx-3371063651 article-content']","/html/body/div[last()-5]/div/div/div/div/div/main/section/div[last()-1]/article/div[last()-1]/div/section"],"exampleValues":[{"num":0,"value":"日前,荣威D7 DMH(图片)混动版官图正式发布。据悉,新车定位新能源中大型轿车,将会在年内上市发售。外观方面,新车将采用全新的设计风格,整体造型十分时尚且富有运动感。值得注意的是,新车并没有采用与电动版相同的分体式大灯设计,而是相对常规的样式。车身尺寸方面,新车长宽高分别为4890/1890/1510mm,轴距为2810mm。动力方面,新车将搭载热效率大于43%的混动专用发动机,CLTC工况下纯电续航里程为125km,综合续航里程为1400km,馈电油耗为4.3L/100km。"}],"unique_index":"zcgjfmkb41ljy4164a","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":-1,"index":3,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[4],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]/div/img[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]/div[1]/img[1]","//img[contains(., '')]","/html/body/div[last()-6]/div[last()-1]/div/div/div/div/main/section/div[last()-1]/article/div[last()-1]/div/section/div[last()-4]/img"]}},{"id":-1,"index":4,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"参数3_图片地址","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://p9-dcd.byteimg.com/img/motor-article-img/0e7a4f1c6e89438ea8dc22163d80fae3~noop.webp"}],"unique_index":"wdaxxokem3ljy44mzo","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":1}],"loopType":1}},{"id":3,"index":5,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[6],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"id(\"article\")//img","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]/div[1]/img[1]","//img[contains(., '')]","/html/body/div[last()-6]/div[last()-1]/div/div/div/div/main/section/div[last()-1]/article/div[last()-1]/div/section/div[last()-4]/img"]}},{"id":4,"index":6,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"参数4_图片地址","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://p9-dcd.byteimg.com/img/motor-article-img/0e7a4f1c6e89438ea8dc22163d80fae3~noop.webp"}],"unique_index":"bmdeqk77gfdljy45n1u","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/compress.py b/.temp_to_pub/compress.py index e149994..5fd68b1 100644 --- a/.temp_to_pub/compress.py +++ b/.temp_to_pub/compress.py @@ -9,7 +9,7 @@ import platform import shutil import zipfile import urllib.request -import py7zr +# import py7zr def compress_folder_to_7z(folder_path, output_file): if os.path.exists(output_file): diff --git a/ElectronJS/EasySpider_en.crx b/ElectronJS/EasySpider_en.crx index 9e2520a..0b45444 100644 Binary files a/ElectronJS/EasySpider_en.crx and b/ElectronJS/EasySpider_en.crx differ diff --git a/ElectronJS/EasySpider_zh.crx b/ElectronJS/EasySpider_zh.crx index 4943f12..f855edc 100644 Binary files a/ElectronJS/EasySpider_zh.crx and b/ElectronJS/EasySpider_zh.crx differ diff --git a/ElectronJS/change_version.py b/ElectronJS/change_version.py index 2409639..46586fd 100644 --- a/ElectronJS/change_version.py +++ b/ElectronJS/change_version.py @@ -30,7 +30,7 @@ def update_file_version(file_path, new_version, key="当前版本/Current Versio file.write(line) -version = "0.3.5" +version = "0.3.6" # py html js @@ -39,7 +39,7 @@ if __name__ == "__main__": file_path = "../.temp_to_pub/compress.py" update_file_version(file_path, version, key='easyspider_version = "') - file_path = "./src/taskGrid/logic_deprecated.js" + file_path = "./src/taskGrid/logic.js" update_file_version(file_path, version, key='"version": "') # file_path = "./src/taskGrid/logic.js" diff --git a/ElectronJS/config.json b/ElectronJS/config.json index bbebf58..8479d2d 100644 --- a/ElectronJS/config.json +++ b/ElectronJS/config.json @@ -1 +1 @@ -{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":0,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Document\\Projects\\EasySpider\\ElectronJS\\user_data"} \ No newline at end of file +{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"} \ No newline at end of file diff --git a/ElectronJS/main.js b/ElectronJS/main.js index 41be454..7f010b5 100644 --- a/ElectronJS/main.js +++ b/ElectronJS/main.js @@ -105,14 +105,14 @@ let handle_pairs = {}; function createWindow() { // Create the browser window. mainWindow = new BrowserWindow({ - width: 520, + width: 550, height: 750, webPreferences: { preload: path.join(__dirname, 'src/js/preload.js') }, icon: iconPath, // frame: false, //取消window自带的关闭最小化等 - // resizable: false //禁止改变主窗口尺寸 + resizable: false //禁止改变主窗口尺寸 }) // and load the index.html of the app. @@ -126,7 +126,7 @@ function createWindow() { app.quit(); } }); - mainWindow.webContents.openDevTools(); + // mainWindow.webContents.openDevTools(); // Open the DevTools. // mainWindow.webContents.openDevTools() } @@ -549,6 +549,10 @@ app.whenReady().then(() => { }) ipcMain.on('start-design', handleOpenBrowser); ipcMain.on('start-invoke', handleOpenInvoke); + ipcMain.on('accept-agreement', function (event, arg) { + config.copyright = 1; + fs.writeFileSync(path.join(task_server.getDir(), "config.json"), JSON.stringify(config)); + }); createWindow(); app.on('activate', function () { diff --git a/ElectronJS/src/index.html b/ElectronJS/src/index.html index b3278a0..2981e9d 100644 --- a/ElectronJS/src/index.html +++ b/ElectronJS/src/index.html @@ -56,7 +56,19 @@
- TEST +

Copyright and Disclaimer

+

Please carefully read the following instructions regarding the use of the software and commercial payments. If you agree, please accept the agreement.

+ +

Agree and Start

Hint: Click Button below to start.

@@ -130,8 +142,18 @@

版权和注意事项声明

-

请仔细阅读下方有关软件使用和商用付费的说明,并接受使用协议以使用本软件。

- +

同意并开始使用

提示:点击下方按钮开始使用。

diff --git a/ElectronJS/src/index.js b/ElectronJS/src/index.js index 230deb0..6153021 100644 --- a/ElectronJS/src/index.js +++ b/ElectronJS/src/index.js @@ -55,6 +55,10 @@ var app = Vue.createApp({ this.init = false; this.lang = lang; }, + acceptAgreement() { + this.step = 0; + window.electronAPI.acceptAgreement(); + }, startDesign(lang, with_data = false, mobile=false) { if (with_data) { console.log(this.user_data_folder) diff --git a/ElectronJS/src/js/preload.js b/ElectronJS/src/js/preload.js index 4b4f2de..d4a7825 100644 --- a/ElectronJS/src/js/preload.js +++ b/ElectronJS/src/js/preload.js @@ -10,4 +10,5 @@ const { contextBridge, ipcRenderer } = require('electron'); contextBridge.exposeInMainWorld('electronAPI', { startDesign: (lang="en", user_data_folder = '', mobile=false) => ipcRenderer.send('start-design', lang, user_data_folder, mobile), startInvoke: (lang="en") => ipcRenderer.send('start-invoke', lang), + acceptAgreement: () => ipcRenderer.send('accept-agreement'), }) \ No newline at end of file diff --git a/ElectronJS/src/taskGrid/FlowChart.html b/ElectronJS/src/taskGrid/FlowChart.html index 168e711..cd52f07 100644 --- a/ElectronJS/src/taskGrid/FlowChart.html +++ b/ElectronJS/src/taskGrid/FlowChart.html @@ -566,7 +566,7 @@ - +
+

{{`要想过Cloudflare验证,需要以下目录存在115版本的Chrome Beta版浏览器,注意是Beta版不是正式版:C:\\Program Files\\Google\\Chrome Beta。如果Beta版本不是115,请在软件下载目录中找到Chrome_Beta_115_win64.7z压缩包,然后解压并复制(覆盖)为C:\\Program Files\\Google\\Chrome Beta目录即可。~To pass the Cloudflare verification, you need the following directory to exist in the 115 version of Chrome Beta, note that it is the Beta version not the official version: C:\\Program Files\\Google\\Chrome Beta, + If the Beta version is not 115, please find the Chrome_Beta_115_win64.7z compressed package in the software download directory, then unzip and copy (overwrite) to the C:\\Program Files\\Google\\Chrome Beta directory.` | lang }}

diff --git a/ElectronJS/tasks/158.json b/ElectronJS/tasks/158.json index 4cabec9..cdebe83 100644 --- a/ElectronJS/tasks/158.json +++ b/ElectronJS/tasks/158.json @@ -1 +1,312 @@ -{"id":158,"name":"(子元素)京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/12/2023, 1:51:59 AM","update_time":"7/12/2023, 9:52:06 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"},{"id":1,"name":"inputText_1","nodeName":"输入文字","nodeId":2,"desc":"要输入的文本,如京东搜索框输入:电脑","type":"text","exampleValue":"iPhone","value":"iPhone"}],"outputParameters":[{"id":0,"name":"参数40_文本","desc":"","type":"text","recordASField":1,"exampleValue":"iPhone"},{"id":1,"name":"参数47_文本","desc":"","type":"text","recordASField":1,"exampleValue":"剩余9天22时10分"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":4,"title":"输入文字","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"key\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"value":"iPhone","allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/input[1]","//input[contains(., '')]","id(\"key\")","//INPUT[@class='text defcolor']","/html/body/div[last()-6]/div/div[last()-2]/div/input"]}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"search-btn\"]/i[1]","iframe":false,"wait":8,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":"3","scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[4]/div[1]/div[2]/div[1]/button[1]/i[1]","//i[contains(., '')]","/html/body/div[last()-6]/div/div[last()-2]/div/button/i"]}},{"id":4,"index":4,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[5],"isInLoop":false,"position":3,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li[1]/div[1]","//div[contains(., '')]","//DIV[@class='gl-i-wrap']","/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-29]/div"]}},{"id":5,"index":5,"parentId":4,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":1,"relative":true,"name":"参数40_文本","desc":"","relativeXPath":"/div[4]/a[1]/em[1]/font[3]","allXPaths":["/div[4]/a[1]/em[1]/font[3]","//font[contains(., 'iPhone')]","//FONT[@class='skcolor_ljg']","/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-24]/div/div[last()-5]/a/em/font"],"exampleValues":[{"num":5,"value":"iPhone"}],"unique_index":"/div[4]/a[1]/em[1]/font[3]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":1,"relative":true,"name":"参数47_文本","desc":"","relativeXPath":"//a/em[1]","allXPaths":["/div[10]/em[1]","//em[contains(., '剩余9天22时10分')]","/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-19]/div/div/em"],"exampleValues":[{"num":10,"value":"剩余9天22时10分"}],"unique_index":"/div[10]/em[1]","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]} \ No newline at end of file +{ + "id": 158, + "name": "(子元素)京东全球版-专业的综合网上购物商城", + "url": "https://www.jd.com", + "links": "https://www.jd.com", + "create_time": "7/12/2023, 1:51:59 AM", + "update_time": "7/12/2023, 9:52:06 AM", + "version": "0.3.5", + "saveThreshold": 10, + "cloudflare": 0, + "environment": 0, + "maxViewLength": 15, + "outputFormat": "xlsx", + "saveName": "current_time", + "containJudge": false, + "desc": "https://www.jd.com", + "inputParameters": [ + { + "id": 0, + "name": "urlList_0", + "nodeId": 1, + "nodeName": "打开网页", + "value": "https://www.jd.com", + "desc": "要采集的网址列表,多行以\\n分开", + "type": "text", + "exampleValue": "https://www.jd.com" + }, + { + "id": 1, + "name": "inputText_1", + "nodeName": "输入文字", + "nodeId": 2, + "desc": "要输入的文本,如京东搜索框输入:电脑", + "type": "text", + "exampleValue": "iPhone", + "value": "iPhone" + } + ], + "outputParameters": [ + { + "id": 0, + "name": "参数40_文本", + "desc": "", + "type": "text", + "recordASField": 1, + "exampleValue": "iPhone" + }, + { + "id": 1, + "name": "参数47_文本", + "desc": "", + "type": "text", + "recordASField": 1, + "exampleValue": "剩余9天22时10分" + } + ], + "graph": [ + { + "index": 0, + "id": 0, + "parentId": 0, + "type": -1, + "option": 0, + "title": "root", + "sequence": [ + 1, + 2, + 3, + 4 + ], + "parameters": { + "history": 1, + "tabIndex": 0, + "useLoop": false, + "xpath": "", + "wait": 0, + "waitType": 0 + }, + "isInLoop": false + }, + { + "id": 1, + "index": 1, + "parentId": 0, + "type": 0, + "option": 1, + "title": "打开网页", + "sequence": [], + "isInLoop": false, + "position": 0, + "parameters": { + "useLoop": false, + "xpath": "", + "wait": 0, + "waitType": 0, + "beforeJS": "", + "beforeJSWaitTime": 0, + "afterJS": "", + "afterJSWaitTime": 0, + "url": "https://www.jd.com", + "links": "https://www.jd.com", + "maxWaitTime": 10, + "scrollType": 0, + "scrollCount": 1, + "scrollWaitTime": 1, + "cookies": "" + } + }, + { + "id": 2, + "index": 2, + "parentId": 0, + "type": 0, + "option": 4, + "title": "输入文字", + "sequence": [], + "isInLoop": false, + "position": 1, + "parameters": { + "history": 4, + "tabIndex": -1, + "useLoop": false, + "xpath": "//*[@id=\"key\"]", + "iframe": false, + "wait": 0, + "waitType": 0, + "beforeJS": "", + "beforeJSWaitTime": 0, + "afterJS": "", + "afterJSWaitTime": 0, + "value": "iPhone", + "allXPaths": [ + "/html/body/div[4]/div[1]/div[2]/div[1]/input[1]", + "//input[contains(., '')]", + "id(\"key\")", + "//INPUT[@class='text defcolor']", + "/html/body/div[last()-6]/div/div[last()-2]/div/input" + ] + } + }, + { + "id": 3, + "index": 3, + "parentId": 0, + "type": 0, + "option": 2, + "title": "点击元素", + "sequence": [], + "isInLoop": false, + "position": 2, + "parameters": { + "history": 4, + "tabIndex": -1, + "useLoop": false, + "xpath": "//*[@id=\"search-btn\"]/i[1]", + "iframe": false, + "wait": 8, + "waitType": 0, + "beforeJS": "", + "beforeJSWaitTime": 0, + "afterJS": "", + "afterJSWaitTime": 0, + "scrollType": "3", + "scrollCount": 1, + "scrollWaitTime": 1, + "clickWay": 0, + "maxWaitTime": 10, + "paras": [], + "allXPaths": [ + "/html/body/div[4]/div[1]/div[2]/div[1]/button[1]/i[1]", + "//i[contains(., '')]", + "/html/body/div[last()-6]/div/div[last()-2]/div/button/i" + ] + } + }, + { + "id": 4, + "index": 4, + "parentId": 0, + "type": 1, + "option": 8, + "title": "循环", + "sequence": [ + 5 + ], + "isInLoop": false, + "position": 3, + "parameters": { + "history": 5, + "tabIndex": -1, + "useLoop": false, + "xpath": "/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li/div[1]", + "iframe": false, + "wait": 0, + "waitType": 0, + "beforeJS": "", + "beforeJSWaitTime": 0, + "afterJS": "", + "afterJSWaitTime": 0, + "scrollType": 0, + "scrollCount": 1, + "scrollWaitTime": 1, + "loopType": 1, + "pathList": "", + "textList": "", + "code": "", + "waitTime": 0, + "exitCount": 0, + "historyWait": 2, + "breakMode": 0, + "breakCode": "", + "breakCodeWaitTime": 0, + "allXPaths": [ + "/html/body/div[5]/div[2]/div[2]/div[1]/div[1]/div[2]/ul[1]/li[1]/div[1]", + "//div[contains(., '')]", + "//DIV[@class='gl-i-wrap']", + "/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-29]/div" + ] + } + }, + { + "id": 5, + "index": 5, + "parentId": 4, + "type": 0, + "option": 3, + "title": "提取数据", + "sequence": [], + "isInLoop": true, + "position": 0, + "parameters": { + "history": 5, + "tabIndex": -1, + "useLoop": false, + "xpath": "", + "iframe": false, + "wait": 0, + "waitType": 0, + "beforeJS": "", + "beforeJSWaitTime": 0, + "afterJS": "", + "afterJSWaitTime": 0, + "paras": [ + { + "nodeType": 0, + "contentType": 1, + "relative": true, + "name": "参数40_文本", + "desc": "", + "relativeXPath": "/div[4]/a[1]/em[1]/font[3]", + "allXPaths": [ + "/div[4]/a[1]/em[1]/font[3]", + "//font[contains(., 'iPhone')]", + "//FONT[@class='skcolor_ljg']", + "/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-24]/div/div[last()-5]/a/em/font" + ], + "exampleValues": [ + { + "num": 5, + "value": "iPhone" + } + ], + "unique_index": "/div[4]/a[1]/em[1]/font[3]", + "iframe": false, + "default": "", + "paraType": "text", + "recordASField": 1, + "beforeJS": "", + "beforeJSWaitTime": 0, + "JS": "", + "JSWaitTime": 0, + "afterJS": "", + "afterJSWaitTime": 0, + "downloadPic": 0 + }, + { + "nodeType": 0, + "contentType": 1, + "relative": true, + "name": "参数47_文本", + "desc": "", + "relativeXPath": "//a/em[1]", + "allXPaths": [ + "/div[10]/em[1]", + "//em[contains(., '剩余9天22时10分')]", + "/html/body/div[last()-11]/div/div/div[last()-1]/div/div[last()-2]/ul/li[last()-19]/div/div/em" + ], + "exampleValues": [ + { + "num": 10, + "value": "剩余9天22时10分" + } + ], + "unique_index": "/div[10]/em[1]", + "iframe": false, + "default": "", + "paraType": "text", + "recordASField": 1, + "beforeJS": "", + "beforeJSWaitTime": 0, + "JS": "", + "JSWaitTime": 0, + "afterJS": "", + "afterJSWaitTime": 0, + "downloadPic": 0 + } + ], + "loopType": 1 + } + } + ] +} \ No newline at end of file diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index 6a3d39e..e19bf0c 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,7 @@ "justMyCode": false, // "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--id", "[4]", "--headless", "0", "--user_data", "1"] + "args": ["--id", "[3]", "--headless", "0", "--user_data", "0"] } ] } \ No newline at end of file diff --git a/ExecuteStage/config.json b/ExecuteStage/config.json index cf29ca4..4cc4016 100644 --- a/ExecuteStage/config.json +++ b/ExecuteStage/config.json @@ -1 +1 @@ -{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Document\\Projects\\EasySpider\\ElectronJS\\user_data"} \ No newline at end of file +{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"} \ No newline at end of file diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index caa3d65..d7f4a6e 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -615,13 +615,17 @@ class BrowserThread(Thread): while True: # do while循环 try: finished = False - newBodyText = self.browser.page_source + # newBodyText = self.browser.page_source + newBodyText = self.browser.find_element(By.XPATH, "//body").text if newBodyText == bodyText: # 如果页面内容无变化 print("页面已检测不到新内容,停止循环。") print("No new content detected on the page, stop loop.") finished = True break else: + if node["parameters"]["exitCount"] == 0: + print("检测到页面变化,继续循环。") + print("Page changed detected, continue loop.") bodyText = newBodyText element = self.browser.find_element( By.XPATH, node["parameters"]["xpath"], iframe=node["parameters"]["iframe"]) @@ -1577,13 +1581,14 @@ if __name__ == '__main__': browser_t = MyChrome( options=options, chrome_options=option, executable_path=driver_path) elif cloudflare == 1: - if sys.platform != "darwin": - options.binary_location = "" # 需要用自己的浏览器 + if sys.platform == "win32": + options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器 + # options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器 browser_t = MyUCChrome( - options=options) + options=options, driver_executable_path=driver_path) else: - print("Not support Cloudflare Mode on MacOS") - print("MacOS不支持Cloudflare验证模式") + print("Cloudflare模式只支持Windows x64平台。") + print("Cloudflare Mode only support on Windows x64 platform.") sys.exit() event = Event() event.set() @@ -1607,9 +1612,9 @@ if __name__ == '__main__': print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。") print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.") print("----------------------------------\n\n") - if cloudflare: - print("过Cloudflare验证模式有时候会不稳定,请注意观察上方提示的浏览器版本信息是否正确,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。") - print("Passing the Cloudflare verification mode is sometimes unstable. Please pay attention to whether the browser version information prompted above is correct. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.") + # if cloudflare: + # print("过Cloudflare验证模式有时候会不稳定,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。") + # print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.") # 使用监听器监听键盘输入 try: with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener: diff --git a/ExecuteStage/myChrome.py b/ExecuteStage/myChrome.py index 4d17fb1..a86f670 100644 --- a/ExecuteStage/myChrome.py +++ b/ExecuteStage/myChrome.py @@ -12,6 +12,8 @@ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.support.ui import Select from selenium.webdriver import ActionChains from selenium.webdriver.common.by import By +import sys + desired_capabilities = DesiredCapabilities.CHROME desired_capabilities["pageLoadStrategy"] = "none" @@ -89,11 +91,11 @@ class MyChrome(webdriver.Chrome): else: return super().find_elements(by=by, value=value) -import sys -if sys.platform != "darwin": # MacOS不支持Cloudflare - ES = 1 - if ES == 1: - import undetected_chromedriver as uc +# MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径 +if sys.platform != "darwin": + ES = True + if ES: # 用自己写的ES版本 + import undetected_chromedriver_ES as uc else: import undetected_chromedriver as uc diff --git a/ExecuteStage/undetected_chromedriver_ES/__init__.py b/ExecuteStage/undetected_chromedriver_ES/__init__.py index 611a76d..ba4cb7f 100644 --- a/ExecuteStage/undetected_chromedriver_ES/__init__.py +++ b/ExecuteStage/undetected_chromedriver_ES/__init__.py @@ -371,6 +371,37 @@ class Chrome(selenium.webdriver.chrome.webdriver.WebDriver): options.binary_location = ( browser_executable_path or find_chrome_executable(chrome_version) ) + if not os.path.exists(options.binary_location): + time.sleep(5) + # 如果没有安装,可以在下面的链接下载安装:https://www.google.com/chrome/beta/ + print(f"""\n\n\n要想过Cloudflare验证,需要以下目录存在115版本的Chrome Beta版浏览器,注意是Beta版不是正式版:C:\Program Files\Google\Chrome Beta + 如果Beta版本不是115,请在软件下载目录中找到Chrome_Beta_115_win64.7z压缩包,然后解压并复制(覆盖)为C:\Program Files\Google\Chrome Beta目录即可。 + + 请手动关闭此程序,配置完成后重新执行任务。 + + """) + print("""To pass the Cloudflare verification, you need the following directory to exist in the 115 version of Chrome Beta, note that it is the Beta version not the official version: C:\Program Files\Google\Chrome Beta, + If the Beta version is not 115, please find the Chrome_Beta_115_win64.7z compressed package in the software download directory, then unzip and copy (overwrite) to the C:\Program Files\Google\Chrome Beta directory. + + Please close this program manually and re-execute the task after the configuration is complete. + + """) + + time.sleep(100) + else: + folder_path = os.path.dirname(os.path.abspath(options.binary_location)) + folder_list = [f for f in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, f))] + numeric_folders = [f for f in folder_list if f[0].isdigit()] + version = numeric_folders[0].split('.')[0] + if version != "115": + time.sleep(5) + print("Chrome Beta版本不是115,请将Chrome Beta的版本替换为115, 方法为下载115版本的Chrome Beta浏览器,然后解压并覆盖C:\Program Files\Google\Chrome Beta目录即可,软件下载目录中有Chrome_Beta_115_win64.7z版本的压缩包,可直接下载后解压替换。") + print("Chrome Beta version is not 115, please replace the version of Chrome Beta with 115, the method is to download the 115 version of Chrome Beta browser, then unzip and overwrite the C:\Program Files\Google\Chrome Beta directory, the software download directory has Chrome_Beta_115_win64.7z version of the compressed package, you can download and unzip directly to replace.") + print("\n请手动关闭此程序。\n") + print("\nPlease close this program manually.\n") + time.sleep(100) + + print("Options Binary Location: ", options.binary_location) @@ -855,7 +886,11 @@ def find_chrome_executable(version): candidates.add(os.sep.join((item, subitem, "chrome.exe"))) for candidate in candidates: if os.path.exists(candidate) and os.access(candidate, os.X_OK): - print("\n\n\n软件将会使用以下目录的Chrome浏览器:", os.path.normpath(candidate), ",请检查此浏览器版本是否为" + str(version) + "版本,如果不是将无法运行。") - print("The software will use the Chrome browser in the following directory:", os.path.normpath(candidate), "Please check if the version of this browser is version " + str(version) + ", if not, it will not be able to run.\n\n\n") + print(f"""\n\n\n要想过Cloudflare验证,需要满足以下条件: + 自己的环境已经安装了115版本的Chrome Beta版浏览器,注意是Beta版不是正式版,且浏览器安装路径必须保持不变,在C:\Program Files\Google\Chrome Beta\Application\chrome.exe + 如果没有安装,可以在下面的链接下载安装:https://www.google.com/chrome/beta/ + 软件将会使用以下目录的Chrome Beta浏览器:", {os.path.normpath(candidate)}, ",请检查此浏览器版本是否为 115 版本的Beta浏览器,如果不是将无法运行。""") + # print("The software will use the Chrome browser in the following directory:", os.path.normpath(candidate), "Please check if the version of this browser is version " + str(version) + ", if not, it will not be able to run.\n\n\n") + print(f"""The software will use the Chrome browser in the following directory: {os.path.normpath(candidate)}, Please check if the version of this browser is version 115, if not, it will not be able to run.\n\n\n""") time.sleep(5) return os.path.normpath(candidate) diff --git a/ExecuteStage/undetected_chromedriver_ES/patcher.py b/ExecuteStage/undetected_chromedriver_ES/patcher.py index 47d618e..8abdd2b 100644 --- a/ExecuteStage/undetected_chromedriver_ES/patcher.py +++ b/ExecuteStage/undetected_chromedriver_ES/patcher.py @@ -141,8 +141,8 @@ class Patcher(object): folder_list = [f for f in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, f))] numeric_folders = [f for f in folder_list if f[0].isdigit()] version = numeric_folders[0].split('.')[0] - print(f"\n\n\nCloudflare下需要自行安装浏览器,请确保自己的机器环境已经安装了 {numeric_folders[0].split('.')[0]} 版本的Chrome浏览器(不是软件自带的Chrome浏览器,需要自己安装浏览器且版本号一定要正确),否则程序无法运行!") - print("Please make sure that your machine environment has installed the Chrome browser version %s (not the Chrome browser provided by the software, you need to install the browser yourself and the version number must be correct), otherwise the program cannot run!" % numeric_folders[0].split('.')[0]) + # print(f"\n\n\nCloudflare下需要自行安装浏览器,请确保自己的机器环境已经安装了 {numeric_folders[0].split('.')[0]} 版本的Chrome浏览器(不是软件自带的Chrome浏览器,需要自己安装浏览器且版本号一定要正确),否则程序无法运行!") + # print("Please make sure that your machine environment has installed the Chrome browser version %s (not the Chrome browser provided by the software, you need to install the browser yourself and the version number must be correct), otherwise the program cannot run!" % numeric_folders[0].split('.')[0]) if not ispatched: print("Patching chromedriver...") diff --git a/Readme.md b/Readme.md index 127993c..42ecceb 100644 --- a/Readme.md +++ b/Readme.md @@ -130,7 +130,7 @@ This software is for learning and communication only. **It is strictly forbidden For the crawler operations of government and military websites, **the author will not answer any questions** in order to avoid violating relevant national laws, regulations and policies. -同时,软件受到专利权保护,如要用于商业用途,请联系[杭州天勤知识产权代理有限公司](http://www.tqip.com/)进行专利授权等付费操作。 +同时,软件受到专利权保护,如要用于商业用途,如使用软件进行盈利接单等,请联系[杭州天勤知识产权代理有限公司](http://www.tqip.com/)进行专利授权等付费操作。 At the same time, the software is protected by patent rights. If you want to use it for commercial purposes, please contact [Hangzhou Tianqin Intellectual Property Agency](http://www.tqip.com/) for patent authorization and other paid operations.