From 8a7f90d55475c69b223bdfa26081faa9de74d8f5 Mon Sep 17 00:00:00 2001 From: naibo Date: Tue, 10 Oct 2023 12:09:31 +0800 Subject: [PATCH] File Downloads in headless --- .../execution_instances/58.json | 1 + .../execution_instances/59.json | 1 + .../execution_instances/60.json | 1 + .../execution_instances/61.json | 1 + .../execution_instances/62.json | 1 + .../execution_instances/63.json | 1 + .../execution_instances/64.json | 1 + .../execution_instances/65.json | 1 + .../execution_instances/66.json | 1 + .../execution_instances/67.json | 1 + .../execution_instances/68.json | 1 + .../execution_instances/69.json | 1 + .../execution_instances/70.json | 1 + .../execution_instances/71.json | 1 + .../execution_instances/72.json | 1 + .../execution_instances/73.json | 1 + .temp_to_pub/EasySpider_windows_x64/tasks/222.json | 1 + .temp_to_pub/EasySpider_windows_x64/tasks/223.json | 1 + .temp_to_pub/EasySpider_windows_x64/tasks/224.json | 1 + .temp_to_pub/EasySpider_windows_x64/tasks/225.json | 1 + ExecuteStage/.vscode/launch.json | 2 +- ExecuteStage/easyspider_executestage.py | 13 +++++++++++-- 22 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/58.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/59.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/60.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/61.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/62.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/63.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/64.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/65.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/66.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/67.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/68.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/69.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/70.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/71.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/72.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/73.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/tasks/222.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/tasks/223.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/tasks/224.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/tasks/225.json diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/58.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/58.json new file mode 100644 index 0000000..c93df22 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/58.json @@ -0,0 +1 @@ +{"id":58,"name":"账号登录","url":"https://member.bilibili.com/platform/upload-manager/article","links":"https://member.bilibili.com/platform/upload-manager/article","create_time":"10/6/2023, 12:09:53 PM","update_time":"10/6/2023, 11:52:56 PM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://member.bilibili.com/platform/upload-manager/article","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://member.bilibili.com/platform/upload-manager/article","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://member.bilibili.com/platform/upload-manager/article"},{"id":1,"name":"loopTimes_循环点击下一页_1","nodeId":3,"nodeName":"循环点击下一页","desc":"循环循环点击下一页执行的次数(0代表无限循环)","type":"int","exampleValue":0,"value":0}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"可视化爬虫易采集EasySpider:如何无代码可视化的爬取需要登录才能爬的网站"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"//www.bilibili.com/video/BV1BN411t71C/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":5,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://member.bilibili.com/platform/upload-manager/article","links":"https://member.bilibili.com/platform/upload-manager/article","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"about:blank","links":"about:blank","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":3,"parentId":0,"type":1,"option":8,"title":"循环点击下一页","sequence":[10,4],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"bcc-pagination-next\")]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":0,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[3]/div[4]/div[2]/div[2]/div[2]/div[2]/div[3]/ul[1]/li[6]","//li[contains(., '下一页')]","//LI[@class='bcc-pagination-item bcc-pagination-next']","/html/body/div[last()-3]/div[last()-2]/div/div[last()-1]/div/div/div/div/ul/li[last()-1]"]}},{"id":4,"index":4,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[1]/div[3]/div[4]/div[2]/div[2]/div[2]/div[2]/div[3]/ul[1]/li[6]","//li[contains(., '下一页')]","//LI[@class='bcc-pagination-item bcc-pagination-next']","/html/body/div[last()-3]/div[last()-2]/div/div[last()-1]/div/div/div/div/ul/li[last()-1]"],"loopType":0}},{"id":-1,"index":5,"parentId":2,"type":1,"option":8,"title":"循环","sequence":[9],"isInLoop":true,"position":0,"parameters":{"history":6,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[3]/div[4]/div[2]/div[2]/div[2]/div[2]/div[2]/div/div[1]/div[1]/div[1]/div[1]/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[3]/div[4]/div[2]/div[2]/div[2]/div[2]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '可视化爬虫易采集Ea')]","//A[@class='name ellipsis']","/html/body/div[last()-3]/div[last()-2]/div/div[last()-1]/div/div/div/div[last()-1]/div[last()-9]/div/div[last()-1]/div/div[last()-2]/a"]}},{"id":-1,"index":6,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":6,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[],"loopType":1}},{"id":-1,"index":7,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[8],"isInLoop":false,"position":2,"parameters":{"history":3,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[3]/div[4]/div[2]/div[2]/div[2]/div[2]/div[2]/div/div[1]/div[1]/a[1]/img[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[3]/div[4]/div[2]/div[2]/div[2]/div[2]/div[2]/div[1]/div[1]/div[1]/a[1]/img[1]","//img[contains(., '')]","//IMG[@class='cover-img']","//IMG[@alt='可视化爬虫易采集EasySpider: 实例 - 反人类网站文章采集和代码调试']","/html/body/div[last()-3]/div[last()-2]/div/div[last()-1]/div/div/div/div[last()-1]/div[last()-9]/div/div[last()-1]/a/img"]}},{"id":-1,"index":8,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":3,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"参数1_图片地址","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"//i2.hdslb.com/bfs/archive/de13b79a4b2e03a75bb3aa76a5f516c596b28eae.jpg@320w_200h"}],"unique_index":"epcrs9l8qalneo50ue","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":-1,"index":9,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"自定义参数_2","desc":"","extractType":0,"relativeXPath":"","recordASField":1,"allXPaths":[],"exampleValues":[{"num":0,"value":"自定义值"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"paraType":"text"}]}},{"id":3,"index":10,"parentId":2,"type":1,"option":8,"title":"循环","sequence":[11],"isInLoop":true,"position":0,"parameters":{"history":3,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[3]/div[4]/div[2]/div[2]/div[2]/div[2]/div[2]/div/div[1]/div[1]/div[1]/div[1]/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[3]/div[4]/div[2]/div[2]/div[2]/div[2]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '可视化爬虫易采集Ea')]","//A[@class='name ellipsis']","/html/body/div[last()-3]/div[last()-2]/div/div[last()-1]/div/div/div/div[last()-1]/div[last()-9]/div/div[last()-1]/div/div[last()-2]/a"]}},{"id":5,"index":11,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":3,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"可视化爬虫易采集EasySpider:如何无代码可视化的爬取需要登录才能爬的网站"}],"unique_index":"q1qua55xh7llnescbx4","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"//www.bilibili.com/video/BV1BN411t71C/"}],"unique_index":"q1qua55xh7llnescbx4","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/59.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/59.json new file mode 100644 index 0000000..f862c8d --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/59.json @@ -0,0 +1 @@ +{"id":59,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","create_time":"","update_time":"10/7/2023, 2:22:16 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"outputParameters":[{"id":0,"name":"参数4_文本","desc":"","type":"text","recordASField":1,"exampleValue":"给大家报个平安已经抵达昆明,不管是想看救援还是旅行。都别急,容我慢慢剪辑。新疆之旅大家期不期待?首个开Ranger环游新疆的博主。 \n#胡七刀 #七刀新疆之旅 #旅行 #新疆 #自驾游"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":3,"relative":false,"name":"参数2_outerHTML","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/xg-video-container[1]/video[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/xg-video-container[1]/video[1]","//video[contains(., '')]","/html/body/div[last()-9]/div[last()-1]/div/div/div/div[last()-1]/div[last()-2]/div/div[last()-9]/div/div[last()-2]/div[last()-1]/div/xg-video-container/video"],"exampleValues":[{"num":0,"value":""}],"unique_index":"qfw4q8y9cglnexkmll","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":-1,"index":3,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":2,"pathList":"//*[contains(@class, \"EZC0YBrG\")]/li[1]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[2]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[3]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[4]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[5]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[6]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[7]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[8]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[9]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[10]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[11]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[12]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[13]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[14]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[15]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[16]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[17]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[18]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[19]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[20]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[21]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[22]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[23]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[24]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[25]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[26]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[27]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[28]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[29]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[30]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[31]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[32]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[33]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[34]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[35]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[36]/div[1]/a[1]/p[1]","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":-1,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"缅东苗瓦迪救援行动-01 间隔一个月,准备慢慢给大家更新了。目前总救援人数19人, \n#胡七刀 #胡七刀特种志愿救援队 #七扇门 #救援 #反诈"}],"unique_index":"sjdb25f7godlnexli08","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":2}},{"id":2,"index":5,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[6,7],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[1]/div[3]/div[3]/div[1]/div[1]/div[3]/div[2]/div[2]/div[2]/ul[1]/li/div[1]/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":6,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":7,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数4_文本","desc":"","relativeXPath":"//body","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[3]/div[1]/div[1]/div[3]/div[2]/div[2]/div[2]/ul[1]/li[7]/div[1]/a[1]/p[1]","//p[contains(., '给大家报个平安已经抵')]","//P[@class='__0w4MvO']","/html/body/div[last()-6]/div[last()-1]/div/div/div/div/div/div/div/div/ul/li[last()-47]/div/a/p"],"exampleValues":[{"num":0,"value":"给大家报个平安已经抵达昆明,不管是想看救援还是旅行。都别急,容我慢慢剪辑。新疆之旅大家期不期待?首个开Ranger环游新疆的博主。 \n#胡七刀 #七刀新疆之旅 #旅行 #新疆 #自驾游"}],"unique_index":"0vfpiu48hdwjlnexm9hz","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/60.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/60.json new file mode 100644 index 0000000..0638b4c --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/60.json @@ -0,0 +1 @@ +{"id":60,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","create_time":"","update_time":"10/7/2023, 2:22:43 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"outputParameters":[{"id":0,"name":"参数4_文本","desc":"","type":"text","recordASField":1,"exampleValue":"给大家报个平安已经抵达昆明,不管是想看救援还是旅行。都别急,容我慢慢剪辑。新疆之旅大家期不期待?首个开Ranger环游新疆的博主。 \n#胡七刀 #七刀新疆之旅 #旅行 #新疆 #自驾游"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":8,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":3,"relative":false,"name":"参数2_outerHTML","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/xg-video-container[1]/video[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/xg-video-container[1]/video[1]","//video[contains(., '')]","/html/body/div[last()-9]/div[last()-1]/div/div/div/div[last()-1]/div[last()-2]/div/div[last()-9]/div/div[last()-2]/div[last()-1]/div/xg-video-container/video"],"exampleValues":[{"num":0,"value":""}],"unique_index":"qfw4q8y9cglnexkmll","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":-1,"index":3,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":2,"pathList":"//*[contains(@class, \"EZC0YBrG\")]/li[1]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[2]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[3]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[4]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[5]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[6]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[7]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[8]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[9]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[10]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[11]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[12]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[13]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[14]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[15]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[16]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[17]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[18]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[19]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[20]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[21]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[22]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[23]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[24]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[25]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[26]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[27]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[28]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[29]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[30]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[31]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[32]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[33]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[34]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[35]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[36]/div[1]/a[1]/p[1]","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":-1,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"缅东苗瓦迪救援行动-01 间隔一个月,准备慢慢给大家更新了。目前总救援人数19人, \n#胡七刀 #胡七刀特种志愿救援队 #七扇门 #救援 #反诈"}],"unique_index":"sjdb25f7godlnexli08","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":2}},{"id":2,"index":5,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[6,7],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[1]/div[3]/div[3]/div[1]/div[1]/div[3]/div[2]/div[2]/div[2]/ul[1]/li/div[1]/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":6,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":7,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数4_文本","desc":"","relativeXPath":"//body","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[3]/div[1]/div[1]/div[3]/div[2]/div[2]/div[2]/ul[1]/li[7]/div[1]/a[1]/p[1]","//p[contains(., '给大家报个平安已经抵')]","//P[@class='__0w4MvO']","/html/body/div[last()-6]/div[last()-1]/div/div/div/div/div/div/div/div/ul/li[last()-47]/div/a/p"],"exampleValues":[{"num":0,"value":"给大家报个平安已经抵达昆明,不管是想看救援还是旅行。都别急,容我慢慢剪辑。新疆之旅大家期不期待?首个开Ranger环游新疆的博主。 \n#胡七刀 #七刀新疆之旅 #旅行 #新疆 #自驾游"}],"unique_index":"0vfpiu48hdwjlnexm9hz","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/61.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/61.json new file mode 100644 index 0000000..05ee629 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/61.json @@ -0,0 +1 @@ +{"id":61,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","create_time":"10/7/2023, 2:25:40 AM","update_time":"10/7/2023, 2:25:40 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0"}],"outputParameters":[{"id":0,"name":"参数1_页面网址","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":7,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[1]/div[3]/div[3]/div[1]/div[1]/div[3]/div[2]/div[2]/div[2]/ul[1]/li/div[1]/a[1]/p[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":7,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":2,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数1_页面网址","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","//div[contains(., '按下 「键盘下键」或')]","//DIV[@class='U2AMRJpW']","/html/body/div[last()-6]/div[last()-1]/div/div/div[last()-2]/div"],"exampleValues":[{"num":0,"value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"unique_index":"7rluu38fzb5lnexsqqu","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/62.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/62.json new file mode 100644 index 0000000..d58d51f --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/62.json @@ -0,0 +1 @@ +{"id":62,"name":"记录下待开发的功能,大家也可以来提想要的功能 · Issue #25 · NaiboWang/EasySpider · GitHub","url":"https://github.com/NaiboWang/EasySpider/issues/25","links":"https://github.com/NaiboWang/EasySpider/issues/25","create_time":"10/7/2023, 2:44:52 AM","update_time":"10/7/2023, 2:44:52 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://github.com/NaiboWang/EasySpider/issues/25","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://github.com/NaiboWang/EasySpider/issues/25","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://github.com/NaiboWang/EasySpider/issues/25"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"\n \n\n\n \n \n \n \n \n \n\n \n\n \n\n \n \n \n\n\n\n \n\n \n \n New issue\n \n \n \n \n\n \n \n \n \n\n\n\n\n \n Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.\n \n\n \n Pick a username\n \n \n\n \n Email Address\n \n \n\n Password\n\n \n \n\n\n\n\n Sign up for GitHub\n\n\n By clicking “Sign up for GitHub”, you agree to our terms of service and\n privacy statement. We’ll occasionally send you account related emails.\n\n \n Already on GitHub?\n Sign in\n to your account\n \n\n\n\n \n\n \n Jump to bottom\n \n \n\n \n 记录下待开发的功能,大家也可以来提想要的功能\n #25\n \n \n \n\n \n \n \n \n \n\n Open\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n\n \n \n \n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n\n \n\n\n\n\n\n \n \n \n \n \n \n \n \n\n Open\n\n \n\n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n \n \n \n\n \n \n \n 记录下待开发的功能,大家也可以来提想要的功能\n \n #25\n\n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n \n \n \n \n \n \n\n\n\n \n\n \n \n Comments\n\n\n \n \n\n \n \n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 13, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 以下功能会在有空的时候集中开发,大家有什么想要的功能也可以提issues留言,也欢迎大家fork之后帮我开发并提交pull request~\n\n 增加隧道IP代理池子教程(功能)。\n 增加一个“自定义操作”,可以执行一些用户制定的JavaScript命令或者系统级别的命令,使得软件更加灵活。\n 条件判断里添加一个“自定义语句判断”,通过用户提供的JavaScript或者系统命令的返回值作为条件判断的判定值\n 截图功能,并通过接入开源OCR模块,自动识别图片中的文字来对付反爬虫字体。\n 提取数据字段增加一项当前页面的URL。\n 处理iframe\n 鼠标移动到元素上\n iframe增加关闭提示框功能\n 切换下拉框\n 记录下cc98用户的开发需求:\n\n\n\n\n (待定)更换selenium底层架构到playwright或Puppeteer\n\n \n \n \n \n \n \n \n \n\n The text was updated successfully, but these errors were encountered:\n \n \n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n\n\n\n\n\n \n \n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n May 15, 2023\n \n\n\n\n \n \n xpath可以搞多种策略然后让用户选择\n #31\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n \n \n \n \n\n \n \n\n \n\n \nNaiboWang\n\n\n\n\n pinned this issue\n\n\n May 15, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Contributor\n\n\n \n\n \n\n \n \n \n\n \n yfdyh000\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 刚刚接触,代码上的初步意见:\n未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。\n从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。\n代码和自述里中英混杂,对fork发展也许不太友好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n刚刚接触,代码上的初步意见: 未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。 从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。 代码和自述里中英混杂,对fork发展也许不太友好。\n\n感谢,都是非常好的建议。\n以上问题大部分原因基本都在于刚开始这个软件是个纯中文软件,只是后来要投稿国际会议WWW 2023所以赶鸭子上架拼凑了一个英文版出来,所以没时间查看其他双语规范的写法。至于en和zh两版是临时想出的解决方案,所有的中英文版本几乎都是独立加载的,没有参考通用写法。\n代码和自述中英混杂是我的习惯问题,因为主要是想中国人用,所以中文放前面,英文只是顺便翻译了一下,不过以后可以考虑分开。\n放出打包的.crx意义确实不大,应该在.gitignore中消掉。\n现阶段我的主要工作还是做科研,等空出时间了会把代码继续重构,符合通用规范。\n再次感谢提出宝贵的建议!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 3\n YannySky, oooing, and LIcopyleft reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 3 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nyfdyh000\n\n\n\n mentioned this issue\n \n May 21, 2023\n \n\n\n\n \n \n 代码方面建议及问题若干\n #42\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 33 tasks\n \n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n timodaxia2019\n \n\n \n\n \n\n commented\n\n\n Jun 1, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n\n这属于衍生需求,有很多工具可以做,参考:https://www.zhihu.com/question/52240701\n因此暂时不考虑。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n SarcomTDG\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 加上打印日志功能\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n Flywolfs reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n wzt0501\n \n\n \n\n \n\n commented\n\n\n Jun 7, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.列表页字段和详情页的字段能对应上;\n2.相似的块的字段名称和字段值能不能识别生成,有的只知道区域,但字段名称不一定是固定的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n\n\n\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n\n表头可以在设计时改。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n jyxzwd\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n •\n\n \n \n \n \n edited by NaiboWang\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,我想提一个关于对 采集到的数据处理的一个 小建议\n现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式?\n其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 2\n QBH-insist and nunamia reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 2 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n BanKnight\n \n\n \n\n \n\n commented\n\n\n Jun 23, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 目前发现还是变量这块不太行(如果实现了,麻烦也告知一下)\n例如:\n\n提取页面数据作为循环次数\n提取页面数据作为xpath依据\n提取页面数据作为“切换下拉选项”的依据\n\n再就是,编辑器部分建议优化为可以拖动那种操作,虽然目前也够用,但是第一次上手还是不太符合直觉\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能支持下载网页中的文件么?例如PDF\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能支持下载网页中的文件么?例如PDF\n\n最新版本特性里就写了可以下载pdf……\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频:\nhttps://www.bilibili.com/video/BV1qs4y1z7Hc\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行:\n\n如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。\n如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。\n而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是:\n我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行: 如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。 如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。 而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是: 我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n\n先找到你的EasySpider\\resources\\app\\src\\taskGrid文件夹,把logic_CN.js的第375到383行删掉,即删掉以下内容:\n if (outputNames.indexOf(title) >= 0) { //参数名称已经被添加\n $('#myModal').modal('hide');\n $(\"#tip2\").slideDown(); //提示框\n fadeout = setTimeout(function() {\n $(\"#tip2\").slideUp();\n }, 5000);\n return;\n }\n\n \n \n \n\n \n \n\n \n \n然后把上面自定义操作的名称改成和上面“提取页面文本”操作里一样的字段名称,如参数1_文本,即可使得相同字段里的内容放在一起。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n\nOK,这个bug会在下一个版本修复。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n aogg\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 搞个无头模式,最好能docker运行\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n搞个无头模式,最好能docker运行\n\n无头模式现在就有。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n\n我加了window.close()之后,直接把整个浏览器关了。\n我的浏览器,点击链接之后就是在新标签页打开,然后每次打开完第一个链接,提取过数据之后,程序就结束了,我也不知道什么情况\n这是我的程序,前面都没问题,点击详情页,就会打开一个新的标签页,然后里面有个元素是下载PDF的,点了以后就可以下载,然后不管加不加window.close(),下载完都会直接结束程序\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n\n好的,谢谢,我的是mac,应该在哪找我的任务文件呢,是这个吗\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 是的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 8, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n导入MySQL数据库功能的版本已发布,欢迎使用。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n rikka5201\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n\n这三个功能很早就实现了,请看视频教程列表里的相关内容。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n koushui\n \n\n \n\n \n\n commented\n\n\n Jul 12, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 建议增加 非浏览器模拟访问的 爬取方式,如直接发送Https/http请求,请求里变量变化采集方式\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下:\n现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题,\n仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理,\n由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息,\n导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误,\n和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。\n因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测,\n所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子:\n①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等,\n等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。\n②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。\n③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下: 现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题, 仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理, 由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息, 导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误, 和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。 因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测, 所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子: ①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等, 等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。 ②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。 ③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n\n已经更新,可以下载最新源码自行编译或等待下一个版本正式版发布。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n xpkyy reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 19, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 好的,非常感谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n nunamia\n \n\n \n\n \n\n commented\n\n\n Jul 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n\n还刚下载,对整个项目不熟,后续使用熟练了,有时间就参加一起完善\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n sylcool\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n\n下个版本会换成ddddocr,paddleOCR需要自行下载代码更换,因为太重量级了无法直接集成。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Nonce-lv\n \n\n \n\n \n\n commented\n\n\n Jul 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 需要增加一个增量爬网的功能。\n比对已经抓取的数量,重复后停止后续操作。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n hzdu\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行\n这里还会涉及到一个学校照片上传的操作,能实现最好了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行 这里还会涉及到一个学校照片上传的操作,能实现最好了\n\n对于批量填写表单功能,使用0.5.0的以下功能配合Excel读取可实现:\n\n\n但因为涉及到了图片上传功能,则需要使用自定义操作中的当前环境下执行代码EXEC功能,配合下面的代码实现:\nhttps://blog.csdn.net/huilan_same/article/details/52439546\n需要注意的是EasySpider定位是一个数据采集软件,因此填写表单这种需求并不是软件的核心。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n HHR-learner\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n\n用带用户信息的浏览器设计和执行,更改浏览器下载目录即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n \n\n \n\n Repository owner\n\n deleted a comment from \n Nonce-lv \n\n\n Sep 22, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n比如微博发文的作者和发布时间作为两个元素无法实现上述功能\n如果分开提取,会生成两列但也不会一一对应,不在同一个循环提取的对应列会自动填充同一元素\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 24, 2023\n \n\n\n\n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n #191\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n\n可以直接提取所有定位到的元素文本,如果不使用循环,则所有的元素文本都会在同一列,一共只有一行。\n如果使用循环,参考地震台网,可以生成结构化的多行数据。\n根据自己的实际情况来决定使用哪种方式即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 25, 2023\n \n\n\n\n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n #192\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n\n\n\n \n\n\n\n \n \n \n \n Sign up for free\n to join this conversation on GitHub.\n Already have an account?\n Sign in to comment\n\n\n \n\n\n \n\n\n\n \n\n\n\n \n \n\n \n \n Assignees\n \n\n\n \n\n No one assigned\n\n\n\n\n\n\n \n\n \n\n\n \n Labels\n \n\n\n \n None yet\n\n\n\n\n\n \n\n \n \n \n Projects\n \n\n \n\n\n None yet\n\n\n\n \n\n\n \n \n \n \n Milestone\n \n\n No milestone\n\n\n\n\n \n \n \n\n \n \n \n \n \n Development\n \n\n\n\n \n No branches or pull requests\n\n\n\n\n\n \n \n\n\n \n \n\n \n\n \n \n \n \n 20 participants\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n\n \n\n \n\n\n\n\n \n\n\n\n\n \n \n \n\n\n \n\n\n "}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://github.com/NaiboWang/EasySpider/issues/25","links":"https://github.com/NaiboWang/EasySpider/issues/25","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[1]/div[4]/div[1]/main[1]/turbo-frame[1]/div[1]","allXPaths":["/html/body/div[1]/div[4]/div[1]/main[1]/turbo-frame[1]/div[1]","//div[contains(., '')]","id(\"repo-content-pjax-container\")","//DIV[@class='repository-content ']","/html/body/div[last()-4]/div[last()-2]/div/main/turbo-frame/div"],"exampleValues":[{"num":0,"value":"\n \n\n\n \n \n \n \n \n \n\n \n\n \n\n \n \n \n\n\n\n \n\n \n \n New issue\n \n \n \n \n\n \n \n \n \n\n\n\n\n \n Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.\n \n\n \n Pick a username\n \n \n\n \n Email Address\n \n \n\n Password\n\n \n \n\n\n\n\n Sign up for GitHub\n\n\n By clicking “Sign up for GitHub”, you agree to our terms of service and\n privacy statement. We’ll occasionally send you account related emails.\n\n \n Already on GitHub?\n Sign in\n to your account\n \n\n\n\n \n\n \n Jump to bottom\n \n \n\n \n 记录下待开发的功能,大家也可以来提想要的功能\n #25\n \n \n \n\n \n \n \n \n \n\n Open\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n\n \n \n \n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n\n \n\n\n\n\n\n \n \n \n \n \n \n \n \n\n Open\n\n \n\n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n \n \n \n\n \n \n \n 记录下待开发的功能,大家也可以来提想要的功能\n \n #25\n\n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n \n \n \n \n \n \n\n\n\n \n\n \n \n Comments\n\n\n \n \n\n \n \n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 13, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 以下功能会在有空的时候集中开发,大家有什么想要的功能也可以提issues留言,也欢迎大家fork之后帮我开发并提交pull request~\n\n 增加隧道IP代理池子教程(功能)。\n 增加一个“自定义操作”,可以执行一些用户制定的JavaScript命令或者系统级别的命令,使得软件更加灵活。\n 条件判断里添加一个“自定义语句判断”,通过用户提供的JavaScript或者系统命令的返回值作为条件判断的判定值\n 截图功能,并通过接入开源OCR模块,自动识别图片中的文字来对付反爬虫字体。\n 提取数据字段增加一项当前页面的URL。\n 处理iframe\n 鼠标移动到元素上\n iframe增加关闭提示框功能\n 切换下拉框\n 记录下cc98用户的开发需求:\n\n\n\n\n (待定)更换selenium底层架构到playwright或Puppeteer\n\n \n \n \n \n \n \n \n \n\n The text was updated successfully, but these errors were encountered:\n \n \n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n\n\n\n\n\n \n \n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n May 15, 2023\n \n\n\n\n \n \n xpath可以搞多种策略然后让用户选择\n #31\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n \n \n \n \n\n \n \n\n \n\n \nNaiboWang\n\n\n\n\n pinned this issue\n\n\n May 15, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Contributor\n\n\n \n\n \n\n \n \n \n\n \n yfdyh000\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 刚刚接触,代码上的初步意见:\n未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。\n从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。\n代码和自述里中英混杂,对fork发展也许不太友好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n刚刚接触,代码上的初步意见: 未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。 从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。 代码和自述里中英混杂,对fork发展也许不太友好。\n\n感谢,都是非常好的建议。\n以上问题大部分原因基本都在于刚开始这个软件是个纯中文软件,只是后来要投稿国际会议WWW 2023所以赶鸭子上架拼凑了一个英文版出来,所以没时间查看其他双语规范的写法。至于en和zh两版是临时想出的解决方案,所有的中英文版本几乎都是独立加载的,没有参考通用写法。\n代码和自述中英混杂是我的习惯问题,因为主要是想中国人用,所以中文放前面,英文只是顺便翻译了一下,不过以后可以考虑分开。\n放出打包的.crx意义确实不大,应该在.gitignore中消掉。\n现阶段我的主要工作还是做科研,等空出时间了会把代码继续重构,符合通用规范。\n再次感谢提出宝贵的建议!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 3\n YannySky, oooing, and LIcopyleft reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 3 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nyfdyh000\n\n\n\n mentioned this issue\n \n May 21, 2023\n \n\n\n\n \n \n 代码方面建议及问题若干\n #42\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 33 tasks\n \n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n timodaxia2019\n \n\n \n\n \n\n commented\n\n\n Jun 1, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n\n这属于衍生需求,有很多工具可以做,参考:https://www.zhihu.com/question/52240701\n因此暂时不考虑。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n SarcomTDG\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 加上打印日志功能\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n Flywolfs reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n wzt0501\n \n\n \n\n \n\n commented\n\n\n Jun 7, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.列表页字段和详情页的字段能对应上;\n2.相似的块的字段名称和字段值能不能识别生成,有的只知道区域,但字段名称不一定是固定的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n\n\n\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n\n表头可以在设计时改。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n jyxzwd\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n •\n\n \n \n \n \n edited by NaiboWang\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,我想提一个关于对 采集到的数据处理的一个 小建议\n现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式?\n其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 2\n QBH-insist and nunamia reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 2 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n BanKnight\n \n\n \n\n \n\n commented\n\n\n Jun 23, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 目前发现还是变量这块不太行(如果实现了,麻烦也告知一下)\n例如:\n\n提取页面数据作为循环次数\n提取页面数据作为xpath依据\n提取页面数据作为“切换下拉选项”的依据\n\n再就是,编辑器部分建议优化为可以拖动那种操作,虽然目前也够用,但是第一次上手还是不太符合直觉\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能支持下载网页中的文件么?例如PDF\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能支持下载网页中的文件么?例如PDF\n\n最新版本特性里就写了可以下载pdf……\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频:\nhttps://www.bilibili.com/video/BV1qs4y1z7Hc\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行:\n\n如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。\n如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。\n而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是:\n我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行: 如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。 如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。 而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是: 我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n\n先找到你的EasySpider\\resources\\app\\src\\taskGrid文件夹,把logic_CN.js的第375到383行删掉,即删掉以下内容:\n if (outputNames.indexOf(title) >= 0) { //参数名称已经被添加\n $('#myModal').modal('hide');\n $(\"#tip2\").slideDown(); //提示框\n fadeout = setTimeout(function() {\n $(\"#tip2\").slideUp();\n }, 5000);\n return;\n }\n\n \n \n \n\n \n \n\n \n \n然后把上面自定义操作的名称改成和上面“提取页面文本”操作里一样的字段名称,如参数1_文本,即可使得相同字段里的内容放在一起。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n\nOK,这个bug会在下一个版本修复。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n aogg\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 搞个无头模式,最好能docker运行\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n搞个无头模式,最好能docker运行\n\n无头模式现在就有。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n\n我加了window.close()之后,直接把整个浏览器关了。\n我的浏览器,点击链接之后就是在新标签页打开,然后每次打开完第一个链接,提取过数据之后,程序就结束了,我也不知道什么情况\n这是我的程序,前面都没问题,点击详情页,就会打开一个新的标签页,然后里面有个元素是下载PDF的,点了以后就可以下载,然后不管加不加window.close(),下载完都会直接结束程序\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n\n好的,谢谢,我的是mac,应该在哪找我的任务文件呢,是这个吗\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 是的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 8, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n导入MySQL数据库功能的版本已发布,欢迎使用。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n rikka5201\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n\n这三个功能很早就实现了,请看视频教程列表里的相关内容。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n koushui\n \n\n \n\n \n\n commented\n\n\n Jul 12, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 建议增加 非浏览器模拟访问的 爬取方式,如直接发送Https/http请求,请求里变量变化采集方式\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下:\n现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题,\n仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理,\n由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息,\n导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误,\n和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。\n因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测,\n所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子:\n①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等,\n等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。\n②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。\n③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下: 现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题, 仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理, 由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息, 导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误, 和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。 因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测, 所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子: ①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等, 等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。 ②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。 ③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n\n已经更新,可以下载最新源码自行编译或等待下一个版本正式版发布。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n xpkyy reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 19, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 好的,非常感谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n nunamia\n \n\n \n\n \n\n commented\n\n\n Jul 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n\n还刚下载,对整个项目不熟,后续使用熟练了,有时间就参加一起完善\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n sylcool\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n\n下个版本会换成ddddocr,paddleOCR需要自行下载代码更换,因为太重量级了无法直接集成。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Nonce-lv\n \n\n \n\n \n\n commented\n\n\n Jul 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 需要增加一个增量爬网的功能。\n比对已经抓取的数量,重复后停止后续操作。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n hzdu\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行\n这里还会涉及到一个学校照片上传的操作,能实现最好了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行 这里还会涉及到一个学校照片上传的操作,能实现最好了\n\n对于批量填写表单功能,使用0.5.0的以下功能配合Excel读取可实现:\n\n\n但因为涉及到了图片上传功能,则需要使用自定义操作中的当前环境下执行代码EXEC功能,配合下面的代码实现:\nhttps://blog.csdn.net/huilan_same/article/details/52439546\n需要注意的是EasySpider定位是一个数据采集软件,因此填写表单这种需求并不是软件的核心。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n HHR-learner\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n\n用带用户信息的浏览器设计和执行,更改浏览器下载目录即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n \n\n \n\n Repository owner\n\n deleted a comment from \n Nonce-lv \n\n\n Sep 22, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n比如微博发文的作者和发布时间作为两个元素无法实现上述功能\n如果分开提取,会生成两列但也不会一一对应,不在同一个循环提取的对应列会自动填充同一元素\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 24, 2023\n \n\n\n\n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n #191\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n\n可以直接提取所有定位到的元素文本,如果不使用循环,则所有的元素文本都会在同一列,一共只有一行。\n如果使用循环,参考地震台网,可以生成结构化的多行数据。\n根据自己的实际情况来决定使用哪种方式即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 25, 2023\n \n\n\n\n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n #192\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n\n\n\n \n\n\n\n \n \n \n \n Sign up for free\n to join this conversation on GitHub.\n Already have an account?\n Sign in to comment\n\n\n \n\n\n \n\n\n\n \n\n\n\n \n \n\n \n \n Assignees\n \n\n\n \n\n No one assigned\n\n\n\n\n\n\n \n\n \n\n\n \n Labels\n \n\n\n \n None yet\n\n\n\n\n\n \n\n \n \n \n Projects\n \n\n \n\n\n None yet\n\n\n\n \n\n\n \n \n \n \n Milestone\n \n\n No milestone\n\n\n\n\n \n \n \n\n \n \n \n \n \n Development\n \n\n\n\n \n No branches or pull requests\n\n\n\n\n\n \n \n\n\n \n \n\n \n\n \n \n \n \n 20 participants\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n\n \n\n \n\n\n\n\n \n\n\n\n\n \n \n \n\n\n \n\n\n "}],"unique_index":"qs7l3vt4mmlneyhe25","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/63.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/63.json new file mode 100644 index 0000000..4f37e25 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/63.json @@ -0,0 +1 @@ +{"id":63,"name":"记录下待开发的功能,大家也可以来提想要的功能 · Issue #25 · NaiboWang/EasySpider · GitHub","url":"https://github.com/NaiboWang/EasySpider/issues/25","links":"https://github.com/NaiboWang/EasySpider/issues/25","create_time":"10/7/2023, 2:44:52 AM","update_time":"10/7/2023, 2:44:52 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://github.com/NaiboWang/EasySpider/issues/25","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://github.com/NaiboWang/EasySpider/issues/25","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://github.com/NaiboWang/EasySpider/issues/25"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"\n \n\n\n \n \n \n \n \n \n\n \n\n \n\n \n \n \n\n\n\n \n\n \n \n New issue\n \n \n \n \n\n \n \n \n \n\n\n\n\n \n Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.\n \n\n \n Pick a username\n \n \n\n \n Email Address\n \n \n\n Password\n\n \n \n\n\n\n\n Sign up for GitHub\n\n\n By clicking “Sign up for GitHub”, you agree to our terms of service and\n privacy statement. We’ll occasionally send you account related emails.\n\n \n Already on GitHub?\n Sign in\n to your account\n \n\n\n\n \n\n \n Jump to bottom\n \n \n\n \n 记录下待开发的功能,大家也可以来提想要的功能\n #25\n \n \n \n\n \n \n \n \n \n\n Open\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n\n \n \n \n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n\n \n\n\n\n\n\n \n \n \n \n \n \n \n \n\n Open\n\n \n\n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n \n \n \n\n \n \n \n 记录下待开发的功能,大家也可以来提想要的功能\n \n #25\n\n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n \n \n \n \n \n \n\n\n\n \n\n \n \n Comments\n\n\n \n \n\n \n \n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 13, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 以下功能会在有空的时候集中开发,大家有什么想要的功能也可以提issues留言,也欢迎大家fork之后帮我开发并提交pull request~\n\n 增加隧道IP代理池子教程(功能)。\n 增加一个“自定义操作”,可以执行一些用户制定的JavaScript命令或者系统级别的命令,使得软件更加灵活。\n 条件判断里添加一个“自定义语句判断”,通过用户提供的JavaScript或者系统命令的返回值作为条件判断的判定值\n 截图功能,并通过接入开源OCR模块,自动识别图片中的文字来对付反爬虫字体。\n 提取数据字段增加一项当前页面的URL。\n 处理iframe\n 鼠标移动到元素上\n iframe增加关闭提示框功能\n 切换下拉框\n 记录下cc98用户的开发需求:\n\n\n\n\n (待定)更换selenium底层架构到playwright或Puppeteer\n\n \n \n \n \n \n \n \n \n\n The text was updated successfully, but these errors were encountered:\n \n \n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n\n\n\n\n\n \n \n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n May 15, 2023\n \n\n\n\n \n \n xpath可以搞多种策略然后让用户选择\n #31\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n \n \n \n \n\n \n \n\n \n\n \nNaiboWang\n\n\n\n\n pinned this issue\n\n\n May 15, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Contributor\n\n\n \n\n \n\n \n \n \n\n \n yfdyh000\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 刚刚接触,代码上的初步意见:\n未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。\n从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。\n代码和自述里中英混杂,对fork发展也许不太友好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n刚刚接触,代码上的初步意见: 未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。 从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。 代码和自述里中英混杂,对fork发展也许不太友好。\n\n感谢,都是非常好的建议。\n以上问题大部分原因基本都在于刚开始这个软件是个纯中文软件,只是后来要投稿国际会议WWW 2023所以赶鸭子上架拼凑了一个英文版出来,所以没时间查看其他双语规范的写法。至于en和zh两版是临时想出的解决方案,所有的中英文版本几乎都是独立加载的,没有参考通用写法。\n代码和自述中英混杂是我的习惯问题,因为主要是想中国人用,所以中文放前面,英文只是顺便翻译了一下,不过以后可以考虑分开。\n放出打包的.crx意义确实不大,应该在.gitignore中消掉。\n现阶段我的主要工作还是做科研,等空出时间了会把代码继续重构,符合通用规范。\n再次感谢提出宝贵的建议!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 3\n YannySky, oooing, and LIcopyleft reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 3 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nyfdyh000\n\n\n\n mentioned this issue\n \n May 21, 2023\n \n\n\n\n \n \n 代码方面建议及问题若干\n #42\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 33 tasks\n \n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n timodaxia2019\n \n\n \n\n \n\n commented\n\n\n Jun 1, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n\n这属于衍生需求,有很多工具可以做,参考:https://www.zhihu.com/question/52240701\n因此暂时不考虑。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n SarcomTDG\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 加上打印日志功能\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n Flywolfs reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n wzt0501\n \n\n \n\n \n\n commented\n\n\n Jun 7, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.列表页字段和详情页的字段能对应上;\n2.相似的块的字段名称和字段值能不能识别生成,有的只知道区域,但字段名称不一定是固定的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n\n\n\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n\n表头可以在设计时改。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n jyxzwd\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n •\n\n \n \n \n \n edited by NaiboWang\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,我想提一个关于对 采集到的数据处理的一个 小建议\n现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式?\n其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 2\n QBH-insist and nunamia reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 2 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n BanKnight\n \n\n \n\n \n\n commented\n\n\n Jun 23, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 目前发现还是变量这块不太行(如果实现了,麻烦也告知一下)\n例如:\n\n提取页面数据作为循环次数\n提取页面数据作为xpath依据\n提取页面数据作为“切换下拉选项”的依据\n\n再就是,编辑器部分建议优化为可以拖动那种操作,虽然目前也够用,但是第一次上手还是不太符合直觉\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能支持下载网页中的文件么?例如PDF\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能支持下载网页中的文件么?例如PDF\n\n最新版本特性里就写了可以下载pdf……\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频:\nhttps://www.bilibili.com/video/BV1qs4y1z7Hc\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行:\n\n如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。\n如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。\n而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是:\n我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行: 如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。 如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。 而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是: 我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n\n先找到你的EasySpider\\resources\\app\\src\\taskGrid文件夹,把logic_CN.js的第375到383行删掉,即删掉以下内容:\n if (outputNames.indexOf(title) >= 0) { //参数名称已经被添加\n $('#myModal').modal('hide');\n $(\"#tip2\").slideDown(); //提示框\n fadeout = setTimeout(function() {\n $(\"#tip2\").slideUp();\n }, 5000);\n return;\n }\n\n \n \n \n\n \n \n\n \n \n然后把上面自定义操作的名称改成和上面“提取页面文本”操作里一样的字段名称,如参数1_文本,即可使得相同字段里的内容放在一起。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n\nOK,这个bug会在下一个版本修复。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n aogg\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 搞个无头模式,最好能docker运行\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n搞个无头模式,最好能docker运行\n\n无头模式现在就有。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n\n我加了window.close()之后,直接把整个浏览器关了。\n我的浏览器,点击链接之后就是在新标签页打开,然后每次打开完第一个链接,提取过数据之后,程序就结束了,我也不知道什么情况\n这是我的程序,前面都没问题,点击详情页,就会打开一个新的标签页,然后里面有个元素是下载PDF的,点了以后就可以下载,然后不管加不加window.close(),下载完都会直接结束程序\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n\n好的,谢谢,我的是mac,应该在哪找我的任务文件呢,是这个吗\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 是的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 8, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n导入MySQL数据库功能的版本已发布,欢迎使用。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n rikka5201\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n\n这三个功能很早就实现了,请看视频教程列表里的相关内容。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n koushui\n \n\n \n\n \n\n commented\n\n\n Jul 12, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 建议增加 非浏览器模拟访问的 爬取方式,如直接发送Https/http请求,请求里变量变化采集方式\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下:\n现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题,\n仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理,\n由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息,\n导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误,\n和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。\n因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测,\n所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子:\n①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等,\n等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。\n②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。\n③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下: 现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题, 仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理, 由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息, 导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误, 和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。 因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测, 所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子: ①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等, 等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。 ②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。 ③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n\n已经更新,可以下载最新源码自行编译或等待下一个版本正式版发布。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n xpkyy reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 19, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 好的,非常感谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n nunamia\n \n\n \n\n \n\n commented\n\n\n Jul 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n\n还刚下载,对整个项目不熟,后续使用熟练了,有时间就参加一起完善\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n sylcool\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n\n下个版本会换成ddddocr,paddleOCR需要自行下载代码更换,因为太重量级了无法直接集成。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Nonce-lv\n \n\n \n\n \n\n commented\n\n\n Jul 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 需要增加一个增量爬网的功能。\n比对已经抓取的数量,重复后停止后续操作。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n hzdu\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行\n这里还会涉及到一个学校照片上传的操作,能实现最好了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行 这里还会涉及到一个学校照片上传的操作,能实现最好了\n\n对于批量填写表单功能,使用0.5.0的以下功能配合Excel读取可实现:\n\n\n但因为涉及到了图片上传功能,则需要使用自定义操作中的当前环境下执行代码EXEC功能,配合下面的代码实现:\nhttps://blog.csdn.net/huilan_same/article/details/52439546\n需要注意的是EasySpider定位是一个数据采集软件,因此填写表单这种需求并不是软件的核心。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n HHR-learner\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n\n用带用户信息的浏览器设计和执行,更改浏览器下载目录即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n \n\n \n\n Repository owner\n\n deleted a comment from \n Nonce-lv \n\n\n Sep 22, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n比如微博发文的作者和发布时间作为两个元素无法实现上述功能\n如果分开提取,会生成两列但也不会一一对应,不在同一个循环提取的对应列会自动填充同一元素\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 24, 2023\n \n\n\n\n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n #191\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n\n可以直接提取所有定位到的元素文本,如果不使用循环,则所有的元素文本都会在同一列,一共只有一行。\n如果使用循环,参考地震台网,可以生成结构化的多行数据。\n根据自己的实际情况来决定使用哪种方式即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 25, 2023\n \n\n\n\n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n #192\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n\n\n\n \n\n\n\n \n \n \n \n Sign up for free\n to join this conversation on GitHub.\n Already have an account?\n Sign in to comment\n\n\n \n\n\n \n\n\n\n \n\n\n\n \n \n\n \n \n Assignees\n \n\n\n \n\n No one assigned\n\n\n\n\n\n\n \n\n \n\n\n \n Labels\n \n\n\n \n None yet\n\n\n\n\n\n \n\n \n \n \n Projects\n \n\n \n\n\n None yet\n\n\n\n \n\n\n \n \n \n \n Milestone\n \n\n No milestone\n\n\n\n\n \n \n \n\n \n \n \n \n \n Development\n \n\n\n\n \n No branches or pull requests\n\n\n\n\n\n \n \n\n\n \n \n\n \n\n \n \n \n \n 20 participants\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n\n \n\n \n\n\n\n\n \n\n\n\n\n \n \n \n\n\n \n\n\n "}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://github.com/NaiboWang/EasySpider/issues/25","links":"https://github.com/NaiboWang/EasySpider/issues/25","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[1]/div[4]/div[1]/main[1]/turbo-frame[1]/div[1]","allXPaths":["/html/body/div[1]/div[4]/div[1]/main[1]/turbo-frame[1]/div[1]","//div[contains(., '')]","id(\"repo-content-pjax-container\")","//DIV[@class='repository-content ']","/html/body/div[last()-4]/div[last()-2]/div/main/turbo-frame/div"],"exampleValues":[{"num":0,"value":"\n \n\n\n \n \n \n \n \n \n\n \n\n \n\n \n \n \n\n\n\n \n\n \n \n New issue\n \n \n \n \n\n \n \n \n \n\n\n\n\n \n Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.\n \n\n \n Pick a username\n \n \n\n \n Email Address\n \n \n\n Password\n\n \n \n\n\n\n\n Sign up for GitHub\n\n\n By clicking “Sign up for GitHub”, you agree to our terms of service and\n privacy statement. We’ll occasionally send you account related emails.\n\n \n Already on GitHub?\n Sign in\n to your account\n \n\n\n\n \n\n \n Jump to bottom\n \n \n\n \n 记录下待开发的功能,大家也可以来提想要的功能\n #25\n \n \n \n\n \n \n \n \n \n\n Open\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n\n \n \n \n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n\n \n\n\n\n\n\n \n \n \n \n \n \n \n \n\n Open\n\n \n\n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n \n \n \n\n \n \n \n 记录下待开发的功能,大家也可以来提想要的功能\n \n #25\n\n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n \n \n \n \n \n \n\n\n\n \n\n \n \n Comments\n\n\n \n \n\n \n \n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 13, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 以下功能会在有空的时候集中开发,大家有什么想要的功能也可以提issues留言,也欢迎大家fork之后帮我开发并提交pull request~\n\n 增加隧道IP代理池子教程(功能)。\n 增加一个“自定义操作”,可以执行一些用户制定的JavaScript命令或者系统级别的命令,使得软件更加灵活。\n 条件判断里添加一个“自定义语句判断”,通过用户提供的JavaScript或者系统命令的返回值作为条件判断的判定值\n 截图功能,并通过接入开源OCR模块,自动识别图片中的文字来对付反爬虫字体。\n 提取数据字段增加一项当前页面的URL。\n 处理iframe\n 鼠标移动到元素上\n iframe增加关闭提示框功能\n 切换下拉框\n 记录下cc98用户的开发需求:\n\n\n\n\n (待定)更换selenium底层架构到playwright或Puppeteer\n\n \n \n \n \n \n \n \n \n\n The text was updated successfully, but these errors were encountered:\n \n \n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n\n\n\n\n\n \n \n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n May 15, 2023\n \n\n\n\n \n \n xpath可以搞多种策略然后让用户选择\n #31\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n \n \n \n \n\n \n \n\n \n\n \nNaiboWang\n\n\n\n\n pinned this issue\n\n\n May 15, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Contributor\n\n\n \n\n \n\n \n \n \n\n \n yfdyh000\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 刚刚接触,代码上的初步意见:\n未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。\n从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。\n代码和自述里中英混杂,对fork发展也许不太友好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n刚刚接触,代码上的初步意见: 未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。 从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。 代码和自述里中英混杂,对fork发展也许不太友好。\n\n感谢,都是非常好的建议。\n以上问题大部分原因基本都在于刚开始这个软件是个纯中文软件,只是后来要投稿国际会议WWW 2023所以赶鸭子上架拼凑了一个英文版出来,所以没时间查看其他双语规范的写法。至于en和zh两版是临时想出的解决方案,所有的中英文版本几乎都是独立加载的,没有参考通用写法。\n代码和自述中英混杂是我的习惯问题,因为主要是想中国人用,所以中文放前面,英文只是顺便翻译了一下,不过以后可以考虑分开。\n放出打包的.crx意义确实不大,应该在.gitignore中消掉。\n现阶段我的主要工作还是做科研,等空出时间了会把代码继续重构,符合通用规范。\n再次感谢提出宝贵的建议!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 3\n YannySky, oooing, and LIcopyleft reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 3 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nyfdyh000\n\n\n\n mentioned this issue\n \n May 21, 2023\n \n\n\n\n \n \n 代码方面建议及问题若干\n #42\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 33 tasks\n \n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n timodaxia2019\n \n\n \n\n \n\n commented\n\n\n Jun 1, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n\n这属于衍生需求,有很多工具可以做,参考:https://www.zhihu.com/question/52240701\n因此暂时不考虑。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n SarcomTDG\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 加上打印日志功能\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n Flywolfs reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n wzt0501\n \n\n \n\n \n\n commented\n\n\n Jun 7, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.列表页字段和详情页的字段能对应上;\n2.相似的块的字段名称和字段值能不能识别生成,有的只知道区域,但字段名称不一定是固定的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n\n\n\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n\n表头可以在设计时改。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n jyxzwd\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n •\n\n \n \n \n \n edited by NaiboWang\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,我想提一个关于对 采集到的数据处理的一个 小建议\n现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式?\n其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 2\n QBH-insist and nunamia reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 2 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n BanKnight\n \n\n \n\n \n\n commented\n\n\n Jun 23, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 目前发现还是变量这块不太行(如果实现了,麻烦也告知一下)\n例如:\n\n提取页面数据作为循环次数\n提取页面数据作为xpath依据\n提取页面数据作为“切换下拉选项”的依据\n\n再就是,编辑器部分建议优化为可以拖动那种操作,虽然目前也够用,但是第一次上手还是不太符合直觉\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能支持下载网页中的文件么?例如PDF\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能支持下载网页中的文件么?例如PDF\n\n最新版本特性里就写了可以下载pdf……\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频:\nhttps://www.bilibili.com/video/BV1qs4y1z7Hc\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行:\n\n如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。\n如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。\n而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是:\n我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行: 如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。 如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。 而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是: 我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n\n先找到你的EasySpider\\resources\\app\\src\\taskGrid文件夹,把logic_CN.js的第375到383行删掉,即删掉以下内容:\n if (outputNames.indexOf(title) >= 0) { //参数名称已经被添加\n $('#myModal').modal('hide');\n $(\"#tip2\").slideDown(); //提示框\n fadeout = setTimeout(function() {\n $(\"#tip2\").slideUp();\n }, 5000);\n return;\n }\n\n \n \n \n\n \n \n\n \n \n然后把上面自定义操作的名称改成和上面“提取页面文本”操作里一样的字段名称,如参数1_文本,即可使得相同字段里的内容放在一起。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n\nOK,这个bug会在下一个版本修复。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n aogg\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 搞个无头模式,最好能docker运行\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n搞个无头模式,最好能docker运行\n\n无头模式现在就有。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n\n我加了window.close()之后,直接把整个浏览器关了。\n我的浏览器,点击链接之后就是在新标签页打开,然后每次打开完第一个链接,提取过数据之后,程序就结束了,我也不知道什么情况\n这是我的程序,前面都没问题,点击详情页,就会打开一个新的标签页,然后里面有个元素是下载PDF的,点了以后就可以下载,然后不管加不加window.close(),下载完都会直接结束程序\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n\n好的,谢谢,我的是mac,应该在哪找我的任务文件呢,是这个吗\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 是的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 8, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n导入MySQL数据库功能的版本已发布,欢迎使用。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n rikka5201\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n\n这三个功能很早就实现了,请看视频教程列表里的相关内容。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n koushui\n \n\n \n\n \n\n commented\n\n\n Jul 12, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 建议增加 非浏览器模拟访问的 爬取方式,如直接发送Https/http请求,请求里变量变化采集方式\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下:\n现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题,\n仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理,\n由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息,\n导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误,\n和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。\n因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测,\n所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子:\n①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等,\n等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。\n②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。\n③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下: 现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题, 仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理, 由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息, 导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误, 和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。 因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测, 所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子: ①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等, 等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。 ②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。 ③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n\n已经更新,可以下载最新源码自行编译或等待下一个版本正式版发布。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n xpkyy reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 19, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 好的,非常感谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n nunamia\n \n\n \n\n \n\n commented\n\n\n Jul 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n\n还刚下载,对整个项目不熟,后续使用熟练了,有时间就参加一起完善\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n sylcool\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n\n下个版本会换成ddddocr,paddleOCR需要自行下载代码更换,因为太重量级了无法直接集成。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Nonce-lv\n \n\n \n\n \n\n commented\n\n\n Jul 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 需要增加一个增量爬网的功能。\n比对已经抓取的数量,重复后停止后续操作。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n hzdu\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行\n这里还会涉及到一个学校照片上传的操作,能实现最好了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行 这里还会涉及到一个学校照片上传的操作,能实现最好了\n\n对于批量填写表单功能,使用0.5.0的以下功能配合Excel读取可实现:\n\n\n但因为涉及到了图片上传功能,则需要使用自定义操作中的当前环境下执行代码EXEC功能,配合下面的代码实现:\nhttps://blog.csdn.net/huilan_same/article/details/52439546\n需要注意的是EasySpider定位是一个数据采集软件,因此填写表单这种需求并不是软件的核心。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n HHR-learner\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n\n用带用户信息的浏览器设计和执行,更改浏览器下载目录即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n \n\n \n\n Repository owner\n\n deleted a comment from \n Nonce-lv \n\n\n Sep 22, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n比如微博发文的作者和发布时间作为两个元素无法实现上述功能\n如果分开提取,会生成两列但也不会一一对应,不在同一个循环提取的对应列会自动填充同一元素\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 24, 2023\n \n\n\n\n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n #191\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n\n可以直接提取所有定位到的元素文本,如果不使用循环,则所有的元素文本都会在同一列,一共只有一行。\n如果使用循环,参考地震台网,可以生成结构化的多行数据。\n根据自己的实际情况来决定使用哪种方式即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 25, 2023\n \n\n\n\n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n #192\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n\n\n\n \n\n\n\n \n \n \n \n Sign up for free\n to join this conversation on GitHub.\n Already have an account?\n Sign in to comment\n\n\n \n\n\n \n\n\n\n \n\n\n\n \n \n\n \n \n Assignees\n \n\n\n \n\n No one assigned\n\n\n\n\n\n\n \n\n \n\n\n \n Labels\n \n\n\n \n None yet\n\n\n\n\n\n \n\n \n \n \n Projects\n \n\n \n\n\n None yet\n\n\n\n \n\n\n \n \n \n \n Milestone\n \n\n No milestone\n\n\n\n\n \n \n \n\n \n \n \n \n \n Development\n \n\n\n\n \n No branches or pull requests\n\n\n\n\n\n \n \n\n\n \n \n\n \n\n \n \n \n \n 20 participants\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n\n \n\n \n\n\n\n\n \n\n\n\n\n \n \n \n\n\n \n\n\n "}],"unique_index":"qs7l3vt4mmlneyhe25","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/64.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/64.json new file mode 100644 index 0000000..87a0d65 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/64.json @@ -0,0 +1 @@ +{"id":64,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","create_time":"10/7/2023, 2:25:40 AM","update_time":"10/7/2023, 3:58:34 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0"}],"outputParameters":[{"id":0,"name":"参数1_页面网址","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":7,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[1]/div[3]/div[3]/div[1]/div[1]/div[3]/div[2]/div[2]/div[2]/ul[1]/li/div[1]/a[1]/p[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":7,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":2,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数1_页面网址","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","//div[contains(., '按下 「键盘下键」或')]","//DIV[@class='U2AMRJpW']","/html/body/div[last()-6]/div[last()-1]/div/div/div[last()-2]/div"],"exampleValues":[{"num":0,"value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"unique_index":"7rluu38fzb5lnexsqqu","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/65.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/65.json new file mode 100644 index 0000000..efcc736 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/65.json @@ -0,0 +1 @@ +{"id":65,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","create_time":"10/7/2023, 2:25:40 AM","update_time":"10/7/2023, 3:58:34 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0"}],"outputParameters":[{"id":0,"name":"参数1_页面网址","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":7,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[1]/div[3]/div[3]/div[1]/div[1]/div[3]/div[2]/div[2]/div[2]/ul[1]/li/div[1]/a[1]/p[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":7,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":2,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数1_页面网址","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","//div[contains(., '按下 「键盘下键」或')]","//DIV[@class='U2AMRJpW']","/html/body/div[last()-6]/div[last()-1]/div/div/div[last()-2]/div"],"exampleValues":[{"num":0,"value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"unique_index":"7rluu38fzb5lnexsqqu","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/66.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/66.json new file mode 100644 index 0000000..30c3eb4 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/66.json @@ -0,0 +1 @@ +{"id":66,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","create_time":"10/7/2023, 2:25:40 AM","update_time":"10/7/2023, 4:00:47 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0"}],"outputParameters":[{"id":0,"name":"参数1_页面网址","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":7,"tabIndex":-1,"useLoop":false,"xpath":"//ul[1]/li/div[1]/a[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":7,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":2,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数1_页面网址","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","//div[contains(., '按下 「键盘下键」或')]","//DIV[@class='U2AMRJpW']","/html/body/div[last()-6]/div[last()-1]/div/div/div[last()-2]/div"],"exampleValues":[{"num":0,"value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"unique_index":"7rluu38fzb5lnexsqqu","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/67.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/67.json new file mode 100644 index 0000000..ecfc65e --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/67.json @@ -0,0 +1 @@ +{"id":67,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","create_time":"10/7/2023, 2:25:40 AM","update_time":"10/7/2023, 4:03:10 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0"}],"outputParameters":[{"id":0,"name":"参数1_页面网址","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":7,"tabIndex":-1,"useLoop":false,"xpath":"//ul[1]/li/div[1]/a[1]/p[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":7,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":2,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数1_页面网址","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","//div[contains(., '按下 「键盘下键」或')]","//DIV[@class='U2AMRJpW']","/html/body/div[last()-6]/div[last()-1]/div/div/div[last()-2]/div"],"exampleValues":[{"num":0,"value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"unique_index":"7rluu38fzb5lnexsqqu","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/68.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/68.json new file mode 100644 index 0000000..df6c2cd --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/68.json @@ -0,0 +1 @@ +{"id":68,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","create_time":"10/7/2023, 2:25:40 AM","update_time":"10/7/2023, 4:04:04 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0"}],"outputParameters":[{"id":0,"name":"参数1_页面网址","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":7,"tabIndex":-1,"useLoop":false,"xpath":"//ul[1]/li/div[1]/a[1]/p[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":7,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":1,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":2,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数1_页面网址","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","//div[contains(., '按下 「键盘下键」或')]","//DIV[@class='U2AMRJpW']","/html/body/div[last()-6]/div[last()-1]/div/div/div[last()-2]/div"],"exampleValues":[{"num":0,"value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"unique_index":"7rluu38fzb5lnexsqqu","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/69.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/69.json new file mode 100644 index 0000000..1691b1f --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/69.json @@ -0,0 +1 @@ +{"id":69,"name":"360安全浏览器2023最新版下载-全面保护上网安全就选360安全浏览器-华军软件园","url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","create_time":"10/10/2023, 11:49:12 AM","update_time":"10/10/2023, 11:49:12 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.onlinedown.net/soft/66801.htm","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.onlinedown.net/soft/66801.htm","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.onlinedown.net/soft/66801.htm"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"lj-btn\")]","iframe":false,"wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[1]/div[3]/div[2]/div[1]/div[1]/a[1]/b[1]","//b[contains(., '立即下载')]","//B[@class='lj-btn']","/html/body/section/div/div[last()-4]/div[last()-1]/div/div[last()-1]/div/a[last()-1]/b"]}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"down-list\")]/div[2]/p[1]/a[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[2]/div[1]/div[4]/div[1]/div[1]/div[2]/p[1]/a[1]","//a[contains(., '通用网络下载')]","//A[@class='qrcode_show rest']","/html/body/section/div/div[last()-3]/div[last()-1]/div[last()-4]/div/div[last()-1]/div[last()-1]/p/a[last()-1]"]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/70.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/70.json new file mode 100644 index 0000000..38a1b47 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/70.json @@ -0,0 +1 @@ +{"id":70,"name":"360安全浏览器2023最新版下载-全面保护上网安全就选360安全浏览器-华军软件园","url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","create_time":"10/10/2023, 11:49:12 AM","update_time":"10/10/2023, 11:50:21 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.onlinedown.net/soft/66801.htm","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.onlinedown.net/soft/66801.htm","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.onlinedown.net/soft/66801.htm"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"lj-btn\")]","iframe":false,"wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[1]/div[3]/div[2]/div[1]/div[1]/a[1]/b[1]","//b[contains(., '立即下载')]","//B[@class='lj-btn']","/html/body/section/div/div[last()-4]/div[last()-1]/div/div[last()-1]/div/a[last()-1]/b"]}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"down-list\")]/div[2]/p[1]/a[1]","iframe":false,"wait":20,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[2]/div[1]/div[4]/div[1]/div[1]/div[2]/p[1]/a[1]","//a[contains(., '通用网络下载')]","//A[@class='qrcode_show rest']","/html/body/section/div/div[last()-3]/div[last()-1]/div[last()-4]/div/div[last()-1]/div[last()-1]/p/a[last()-1]"]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/71.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/71.json new file mode 100644 index 0000000..15af7b9 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/71.json @@ -0,0 +1 @@ +{"id":71,"name":"360安全浏览器2023最新版下载-全面保护上网安全就选360安全浏览器-华军软件园","url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","create_time":"10/10/2023, 11:49:12 AM","update_time":"10/10/2023, 11:57:09 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.onlinedown.net/soft/66801.htm","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.onlinedown.net/soft/66801.htm","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.onlinedown.net/soft/66801.htm"}],"outputParameters":[{"id":0,"name":"自定义操作","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,4,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"lj-btn\")]","iframe":false,"wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[1]/div[3]/div[2]/div[1]/div[1]/a[1]/b[1]","//b[contains(., '立即下载')]","//B[@class='lj-btn']","/html/body/section/div/div[last()-4]/div[last()-1]/div/div[last()-1]/div/a[last()-1]/b"]}},{"id":4,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"down-list\")]/div[2]/p[1]/a[1]","iframe":false,"wait":20,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[2]/div[1]/div[4]/div[1]/div[1]/div[2]/p[1]/a[1]","//a[contains(., '通用网络下载')]","//A[@class='qrcode_show rest']","/html/body/section/div/div[last()-3]/div[last()-1]/div[last()-4]/div/div[last()-1]/div[last()-1]/p/a[last()-1]"]}},{"id":2,"index":4,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"codeMode":"5","code":"self.browser.command_executor._commands[\"send_command\"] = (\"POST\", '/session/$sessionId/chromium/send_command')\nself.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': \"F:\\\"}}\nself.browser.execute(\"send_command\", self.paramss)","waitTime":0,"recordASField":0,"paraType":"text"}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/72.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/72.json new file mode 100644 index 0000000..251e336 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/72.json @@ -0,0 +1 @@ +{"id":72,"name":"360安全浏览器2023最新版下载-全面保护上网安全就选360安全浏览器-华军软件园","url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","create_time":"10/10/2023, 11:49:12 AM","update_time":"10/10/2023, 12:04:02 PM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.onlinedown.net/soft/66801.htm","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.onlinedown.net/soft/66801.htm","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.onlinedown.net/soft/66801.htm"}],"outputParameters":[{"id":0,"name":"自定义操作","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,4,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"lj-btn\")]","iframe":false,"wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[1]/div[3]/div[2]/div[1]/div[1]/a[1]/b[1]","//b[contains(., '立即下载')]","//B[@class='lj-btn']","/html/body/section/div/div[last()-4]/div[last()-1]/div/div[last()-1]/div/a[last()-1]/b"]}},{"id":4,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"down-list\")]/div[2]/p[1]/a[1]","iframe":false,"wait":20,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[2]/div[1]/div[4]/div[1]/div[1]/div[2]/p[1]/a[1]","//a[contains(., '通用网络下载')]","//A[@class='qrcode_show rest']","/html/body/section/div/div[last()-3]/div[last()-1]/div[last()-4]/div/div[last()-1]/div[last()-1]/p/a[last()-1]"]}},{"id":2,"index":4,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"codeMode":"5","code":"self.browser.command_executor._commands[\"send_command\"] = (\"POST\", \"/session/$sessionId/chromium/send_command\")\nself.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': 'F:\\'}}\nself.browser.execute(\"send_command\", self.paramss)","waitTime":0,"recordASField":0,"paraType":"text"}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/73.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/73.json new file mode 100644 index 0000000..04c99c5 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/73.json @@ -0,0 +1 @@ +{"id":73,"name":"360安全浏览器2023最新版下载-全面保护上网安全就选360安全浏览器-华军软件园","url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","create_time":"10/10/2023, 11:49:12 AM","update_time":"10/10/2023, 12:05:32 PM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.onlinedown.net/soft/66801.htm","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.onlinedown.net/soft/66801.htm","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.onlinedown.net/soft/66801.htm"}],"outputParameters":[{"id":0,"name":"自定义操作","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,4,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"lj-btn\")]","iframe":false,"wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[1]/div[3]/div[2]/div[1]/div[1]/a[1]/b[1]","//b[contains(., '立即下载')]","//B[@class='lj-btn']","/html/body/section/div/div[last()-4]/div[last()-1]/div/div[last()-1]/div/a[last()-1]/b"]}},{"id":4,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"down-list\")]/div[2]/p[1]/a[1]","iframe":false,"wait":20,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[2]/div[1]/div[4]/div[1]/div[1]/div[2]/p[1]/a[1]","//a[contains(., '通用网络下载')]","//A[@class='qrcode_show rest']","/html/body/section/div/div[last()-3]/div[last()-1]/div[last()-4]/div/div[last()-1]/div[last()-1]/p/a[last()-1]"]}},{"id":2,"index":4,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"codeMode":"5","code":"self.browser.command_executor._commands[\"send_command\"] = (\"POST\", \"/session/$sessionId/chromium/send_command\")\nself.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': 'F:\\\\'}}\nself.browser.execute(\"send_command\", self.paramss)","waitTime":0,"recordASField":0,"paraType":"text"}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/tasks/222.json b/.temp_to_pub/EasySpider_windows_x64/tasks/222.json new file mode 100644 index 0000000..68052d7 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/tasks/222.json @@ -0,0 +1 @@ +{"id":222,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","create_time":"","update_time":"10/7/2023, 2:22:43 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"outputParameters":[{"id":0,"name":"参数4_文本","desc":"","type":"text","recordASField":1,"exampleValue":"给大家报个平安已经抵达昆明,不管是想看救援还是旅行。都别急,容我慢慢剪辑。新疆之旅大家期不期待?首个开Ranger环游新疆的博主。 \n#胡七刀 #七刀新疆之旅 #旅行 #新疆 #自驾游"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":8,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":-1,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":3,"relative":false,"name":"参数2_outerHTML","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/xg-video-container[1]/video[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/xg-video-container[1]/video[1]","//video[contains(., '')]","/html/body/div[last()-9]/div[last()-1]/div/div/div/div[last()-1]/div[last()-2]/div/div[last()-9]/div/div[last()-2]/div[last()-1]/div/xg-video-container/video"],"exampleValues":[{"num":0,"value":""}],"unique_index":"qfw4q8y9cglnexkmll","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":-1,"index":3,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":2,"pathList":"//*[contains(@class, \"EZC0YBrG\")]/li[1]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[2]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[3]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[4]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[5]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[6]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[7]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[8]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[9]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[10]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[11]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[12]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[13]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[14]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[15]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[16]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[17]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[18]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[19]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[20]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[21]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[22]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[23]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[24]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[25]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[26]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[27]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[28]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[29]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[30]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[31]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[32]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[33]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[34]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[35]/div[1]/a[1]/p[1]\n//*[contains(@class, \"EZC0YBrG\")]/li[36]/div[1]/a[1]/p[1]","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":-1,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"缅东苗瓦迪救援行动-01 间隔一个月,准备慢慢给大家更新了。目前总救援人数19人, \n#胡七刀 #胡七刀特种志愿救援队 #七扇门 #救援 #反诈"}],"unique_index":"sjdb25f7godlnexli08","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":2}},{"id":2,"index":5,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[6,7],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[1]/div[3]/div[3]/div[1]/div[1]/div[3]/div[2]/div[2]/div[2]/ul[1]/li/div[1]/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":6,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":7,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数4_文本","desc":"","relativeXPath":"//body","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[3]/div[1]/div[1]/div[3]/div[2]/div[2]/div[2]/ul[1]/li[7]/div[1]/a[1]/p[1]","//p[contains(., '给大家报个平安已经抵')]","//P[@class='__0w4MvO']","/html/body/div[last()-6]/div[last()-1]/div/div/div/div/div/div/div/div/ul/li[last()-47]/div/a/p"],"exampleValues":[{"num":0,"value":"给大家报个平安已经抵达昆明,不管是想看救援还是旅行。都别急,容我慢慢剪辑。新疆之旅大家期不期待?首个开Ranger环游新疆的博主。 \n#胡七刀 #七刀新疆之旅 #旅行 #新疆 #自驾游"}],"unique_index":"0vfpiu48hdwjlnexm9hz","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/tasks/223.json b/.temp_to_pub/EasySpider_windows_x64/tasks/223.json new file mode 100644 index 0000000..5e4c651 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/tasks/223.json @@ -0,0 +1 @@ +{"id":223,"name":"胡七刀的主页 - 抖音","url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","create_time":"10/7/2023, 2:25:40 AM","update_time":"10/7/2023, 4:04:04 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0"}],"outputParameters":[{"id":0,"name":"参数1_页面网址","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","links":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3,4],"isInLoop":false,"position":1,"parameters":{"history":7,"tabIndex":-1,"useLoop":false,"xpath":"//ul[1]/li/div[1]/a[1]/p[1]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":7,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":1,"maxWaitTime":10,"paras":[],"allXPaths":"","loopType":1}},{"id":4,"index":4,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":2,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":5,"relative":false,"name":"参数1_页面网址","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","allXPaths":["/html/body/div[2]/div[1]/div[3]/div[4]/div[3]/div[1]","//div[contains(., '按下 「键盘下键」或')]","//DIV[@class='U2AMRJpW']","/html/body/div[last()-6]/div[last()-1]/div/div/div[last()-2]/div"],"exampleValues":[{"num":0,"value":"https://www.douyin.com/user/MS4wLjABAAAAKiFgskn1GeaftZC_aHjHn4J4rMOCd0QDEBaLYnV7Et0?modal_id=7286064245169458489"}],"unique_index":"7rluu38fzb5lnexsqqu","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/tasks/224.json b/.temp_to_pub/EasySpider_windows_x64/tasks/224.json new file mode 100644 index 0000000..327d24d --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/tasks/224.json @@ -0,0 +1 @@ +{"id":224,"name":"记录下待开发的功能,大家也可以来提想要的功能 · Issue #25 · NaiboWang/EasySpider · GitHub","url":"https://github.com/NaiboWang/EasySpider/issues/25","links":"https://github.com/NaiboWang/EasySpider/issues/25","create_time":"10/7/2023, 2:44:52 AM","update_time":"10/7/2023, 2:44:52 AM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://github.com/NaiboWang/EasySpider/issues/25","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://github.com/NaiboWang/EasySpider/issues/25","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://github.com/NaiboWang/EasySpider/issues/25"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"\n \n\n\n \n \n \n \n \n \n\n \n\n \n\n \n \n \n\n\n\n \n\n \n \n New issue\n \n \n \n \n\n \n \n \n \n\n\n\n\n \n Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.\n \n\n \n Pick a username\n \n \n\n \n Email Address\n \n \n\n Password\n\n \n \n\n\n\n\n Sign up for GitHub\n\n\n By clicking “Sign up for GitHub”, you agree to our terms of service and\n privacy statement. We’ll occasionally send you account related emails.\n\n \n Already on GitHub?\n Sign in\n to your account\n \n\n\n\n \n\n \n Jump to bottom\n \n \n\n \n 记录下待开发的功能,大家也可以来提想要的功能\n #25\n \n \n \n\n \n \n \n \n \n\n Open\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n\n \n \n \n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n\n \n\n\n\n\n\n \n \n \n \n \n \n \n \n\n Open\n\n \n\n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n \n \n \n\n \n \n \n 记录下待开发的功能,大家也可以来提想要的功能\n \n #25\n\n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n \n \n \n \n \n \n\n\n\n \n\n \n \n Comments\n\n\n \n \n\n \n \n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 13, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 以下功能会在有空的时候集中开发,大家有什么想要的功能也可以提issues留言,也欢迎大家fork之后帮我开发并提交pull request~\n\n 增加隧道IP代理池子教程(功能)。\n 增加一个“自定义操作”,可以执行一些用户制定的JavaScript命令或者系统级别的命令,使得软件更加灵活。\n 条件判断里添加一个“自定义语句判断”,通过用户提供的JavaScript或者系统命令的返回值作为条件判断的判定值\n 截图功能,并通过接入开源OCR模块,自动识别图片中的文字来对付反爬虫字体。\n 提取数据字段增加一项当前页面的URL。\n 处理iframe\n 鼠标移动到元素上\n iframe增加关闭提示框功能\n 切换下拉框\n 记录下cc98用户的开发需求:\n\n\n\n\n (待定)更换selenium底层架构到playwright或Puppeteer\n\n \n \n \n \n \n \n \n \n\n The text was updated successfully, but these errors were encountered:\n \n \n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n\n\n\n\n\n \n \n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n May 15, 2023\n \n\n\n\n \n \n xpath可以搞多种策略然后让用户选择\n #31\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n \n \n \n \n\n \n \n\n \n\n \nNaiboWang\n\n\n\n\n pinned this issue\n\n\n May 15, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Contributor\n\n\n \n\n \n\n \n \n \n\n \n yfdyh000\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 刚刚接触,代码上的初步意见:\n未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。\n从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。\n代码和自述里中英混杂,对fork发展也许不太友好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n刚刚接触,代码上的初步意见: 未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。 从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。 代码和自述里中英混杂,对fork发展也许不太友好。\n\n感谢,都是非常好的建议。\n以上问题大部分原因基本都在于刚开始这个软件是个纯中文软件,只是后来要投稿国际会议WWW 2023所以赶鸭子上架拼凑了一个英文版出来,所以没时间查看其他双语规范的写法。至于en和zh两版是临时想出的解决方案,所有的中英文版本几乎都是独立加载的,没有参考通用写法。\n代码和自述中英混杂是我的习惯问题,因为主要是想中国人用,所以中文放前面,英文只是顺便翻译了一下,不过以后可以考虑分开。\n放出打包的.crx意义确实不大,应该在.gitignore中消掉。\n现阶段我的主要工作还是做科研,等空出时间了会把代码继续重构,符合通用规范。\n再次感谢提出宝贵的建议!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 3\n YannySky, oooing, and LIcopyleft reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 3 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nyfdyh000\n\n\n\n mentioned this issue\n \n May 21, 2023\n \n\n\n\n \n \n 代码方面建议及问题若干\n #42\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 33 tasks\n \n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n timodaxia2019\n \n\n \n\n \n\n commented\n\n\n Jun 1, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n\n这属于衍生需求,有很多工具可以做,参考:https://www.zhihu.com/question/52240701\n因此暂时不考虑。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n SarcomTDG\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 加上打印日志功能\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n Flywolfs reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n wzt0501\n \n\n \n\n \n\n commented\n\n\n Jun 7, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.列表页字段和详情页的字段能对应上;\n2.相似的块的字段名称和字段值能不能识别生成,有的只知道区域,但字段名称不一定是固定的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n\n\n\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n\n表头可以在设计时改。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n jyxzwd\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n •\n\n \n \n \n \n edited by NaiboWang\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,我想提一个关于对 采集到的数据处理的一个 小建议\n现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式?\n其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 2\n QBH-insist and nunamia reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 2 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n BanKnight\n \n\n \n\n \n\n commented\n\n\n Jun 23, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 目前发现还是变量这块不太行(如果实现了,麻烦也告知一下)\n例如:\n\n提取页面数据作为循环次数\n提取页面数据作为xpath依据\n提取页面数据作为“切换下拉选项”的依据\n\n再就是,编辑器部分建议优化为可以拖动那种操作,虽然目前也够用,但是第一次上手还是不太符合直觉\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能支持下载网页中的文件么?例如PDF\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能支持下载网页中的文件么?例如PDF\n\n最新版本特性里就写了可以下载pdf……\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频:\nhttps://www.bilibili.com/video/BV1qs4y1z7Hc\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行:\n\n如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。\n如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。\n而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是:\n我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行: 如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。 如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。 而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是: 我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n\n先找到你的EasySpider\\resources\\app\\src\\taskGrid文件夹,把logic_CN.js的第375到383行删掉,即删掉以下内容:\n if (outputNames.indexOf(title) >= 0) { //参数名称已经被添加\n $('#myModal').modal('hide');\n $(\"#tip2\").slideDown(); //提示框\n fadeout = setTimeout(function() {\n $(\"#tip2\").slideUp();\n }, 5000);\n return;\n }\n\n \n \n \n\n \n \n\n \n \n然后把上面自定义操作的名称改成和上面“提取页面文本”操作里一样的字段名称,如参数1_文本,即可使得相同字段里的内容放在一起。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n\nOK,这个bug会在下一个版本修复。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n aogg\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 搞个无头模式,最好能docker运行\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n搞个无头模式,最好能docker运行\n\n无头模式现在就有。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n\n我加了window.close()之后,直接把整个浏览器关了。\n我的浏览器,点击链接之后就是在新标签页打开,然后每次打开完第一个链接,提取过数据之后,程序就结束了,我也不知道什么情况\n这是我的程序,前面都没问题,点击详情页,就会打开一个新的标签页,然后里面有个元素是下载PDF的,点了以后就可以下载,然后不管加不加window.close(),下载完都会直接结束程序\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n\n好的,谢谢,我的是mac,应该在哪找我的任务文件呢,是这个吗\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 是的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 8, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n导入MySQL数据库功能的版本已发布,欢迎使用。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n rikka5201\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n\n这三个功能很早就实现了,请看视频教程列表里的相关内容。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n koushui\n \n\n \n\n \n\n commented\n\n\n Jul 12, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 建议增加 非浏览器模拟访问的 爬取方式,如直接发送Https/http请求,请求里变量变化采集方式\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下:\n现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题,\n仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理,\n由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息,\n导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误,\n和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。\n因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测,\n所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子:\n①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等,\n等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。\n②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。\n③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下: 现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题, 仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理, 由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息, 导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误, 和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。 因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测, 所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子: ①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等, 等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。 ②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。 ③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n\n已经更新,可以下载最新源码自行编译或等待下一个版本正式版发布。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n xpkyy reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 19, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 好的,非常感谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n nunamia\n \n\n \n\n \n\n commented\n\n\n Jul 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n\n还刚下载,对整个项目不熟,后续使用熟练了,有时间就参加一起完善\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n sylcool\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n\n下个版本会换成ddddocr,paddleOCR需要自行下载代码更换,因为太重量级了无法直接集成。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Nonce-lv\n \n\n \n\n \n\n commented\n\n\n Jul 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 需要增加一个增量爬网的功能。\n比对已经抓取的数量,重复后停止后续操作。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n hzdu\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行\n这里还会涉及到一个学校照片上传的操作,能实现最好了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行 这里还会涉及到一个学校照片上传的操作,能实现最好了\n\n对于批量填写表单功能,使用0.5.0的以下功能配合Excel读取可实现:\n\n\n但因为涉及到了图片上传功能,则需要使用自定义操作中的当前环境下执行代码EXEC功能,配合下面的代码实现:\nhttps://blog.csdn.net/huilan_same/article/details/52439546\n需要注意的是EasySpider定位是一个数据采集软件,因此填写表单这种需求并不是软件的核心。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n HHR-learner\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n\n用带用户信息的浏览器设计和执行,更改浏览器下载目录即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n \n\n \n\n Repository owner\n\n deleted a comment from \n Nonce-lv \n\n\n Sep 22, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n比如微博发文的作者和发布时间作为两个元素无法实现上述功能\n如果分开提取,会生成两列但也不会一一对应,不在同一个循环提取的对应列会自动填充同一元素\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 24, 2023\n \n\n\n\n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n #191\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n\n可以直接提取所有定位到的元素文本,如果不使用循环,则所有的元素文本都会在同一列,一共只有一行。\n如果使用循环,参考地震台网,可以生成结构化的多行数据。\n根据自己的实际情况来决定使用哪种方式即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 25, 2023\n \n\n\n\n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n #192\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n\n\n\n \n\n\n\n \n \n \n \n Sign up for free\n to join this conversation on GitHub.\n Already have an account?\n Sign in to comment\n\n\n \n\n\n \n\n\n\n \n\n\n\n \n \n\n \n \n Assignees\n \n\n\n \n\n No one assigned\n\n\n\n\n\n\n \n\n \n\n\n \n Labels\n \n\n\n \n None yet\n\n\n\n\n\n \n\n \n \n \n Projects\n \n\n \n\n\n None yet\n\n\n\n \n\n\n \n \n \n \n Milestone\n \n\n No milestone\n\n\n\n\n \n \n \n\n \n \n \n \n \n Development\n \n\n\n\n \n No branches or pull requests\n\n\n\n\n\n \n \n\n\n \n \n\n \n\n \n \n \n \n 20 participants\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n\n \n\n \n\n\n\n\n \n\n\n\n\n \n \n \n\n\n \n\n\n "}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://github.com/NaiboWang/EasySpider/issues/25","links":"https://github.com/NaiboWang/EasySpider/issues/25","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[1]/div[4]/div[1]/main[1]/turbo-frame[1]/div[1]","allXPaths":["/html/body/div[1]/div[4]/div[1]/main[1]/turbo-frame[1]/div[1]","//div[contains(., '')]","id(\"repo-content-pjax-container\")","//DIV[@class='repository-content ']","/html/body/div[last()-4]/div[last()-2]/div/main/turbo-frame/div"],"exampleValues":[{"num":0,"value":"\n \n\n\n \n \n \n \n \n \n\n \n\n \n\n \n \n \n\n\n\n \n\n \n \n New issue\n \n \n \n \n\n \n \n \n \n\n\n\n\n \n Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.\n \n\n \n Pick a username\n \n \n\n \n Email Address\n \n \n\n Password\n\n \n \n\n\n\n\n Sign up for GitHub\n\n\n By clicking “Sign up for GitHub”, you agree to our terms of service and\n privacy statement. We’ll occasionally send you account related emails.\n\n \n Already on GitHub?\n Sign in\n to your account\n \n\n\n\n \n\n \n Jump to bottom\n \n \n\n \n 记录下待开发的功能,大家也可以来提想要的功能\n #25\n \n \n \n\n \n \n \n \n \n\n Open\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n\n \n \n \n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n\n \n\n\n\n\n\n \n \n \n \n \n \n \n \n\n Open\n\n \n\n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 10 of 11 tasks\n \n\n\n \n\n \n\n \n \n \n\n \n \n \n 记录下待开发的功能,大家也可以来提想要的功能\n \n #25\n\n\n \n NaiboWang opened this issue\nMay 13, 2023\n· 48 comments\n\n\n \n\n\n \n \n \n \n \n \n \n\n\n\n \n\n \n \n Comments\n\n\n \n \n\n \n \n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 13, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 以下功能会在有空的时候集中开发,大家有什么想要的功能也可以提issues留言,也欢迎大家fork之后帮我开发并提交pull request~\n\n 增加隧道IP代理池子教程(功能)。\n 增加一个“自定义操作”,可以执行一些用户制定的JavaScript命令或者系统级别的命令,使得软件更加灵活。\n 条件判断里添加一个“自定义语句判断”,通过用户提供的JavaScript或者系统命令的返回值作为条件判断的判定值\n 截图功能,并通过接入开源OCR模块,自动识别图片中的文字来对付反爬虫字体。\n 提取数据字段增加一项当前页面的URL。\n 处理iframe\n 鼠标移动到元素上\n iframe增加关闭提示框功能\n 切换下拉框\n 记录下cc98用户的开发需求:\n\n\n\n\n (待定)更换selenium底层架构到playwright或Puppeteer\n\n \n \n \n \n \n \n \n \n\n The text was updated successfully, but these errors were encountered:\n \n \n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n\n\n\n\n\n \n \n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n May 15, 2023\n \n\n\n\n \n \n xpath可以搞多种策略然后让用户选择\n #31\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n \n \n \n \n\n \n \n\n \n\n \nNaiboWang\n\n\n\n\n pinned this issue\n\n\n May 15, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Contributor\n\n\n \n\n \n\n \n \n \n\n \n yfdyh000\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 刚刚接触,代码上的初步意见:\n未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。\n从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。\n代码和自述里中英混杂,对fork发展也许不太友好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n May 20, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n刚刚接触,代码上的初步意见: 未看出Git中放打包的EasySpider.crx,以及分成en和zh两版的意义。 从代码国际化(i18n)来说,lang=zh应该zh-cn或zh-hans。 代码和自述里中英混杂,对fork发展也许不太友好。\n\n感谢,都是非常好的建议。\n以上问题大部分原因基本都在于刚开始这个软件是个纯中文软件,只是后来要投稿国际会议WWW 2023所以赶鸭子上架拼凑了一个英文版出来,所以没时间查看其他双语规范的写法。至于en和zh两版是临时想出的解决方案,所有的中英文版本几乎都是独立加载的,没有参考通用写法。\n代码和自述中英混杂是我的习惯问题,因为主要是想中国人用,所以中文放前面,英文只是顺便翻译了一下,不过以后可以考虑分开。\n放出打包的.crx意义确实不大,应该在.gitignore中消掉。\n现阶段我的主要工作还是做科研,等空出时间了会把代码继续重构,符合通用规范。\n再次感谢提出宝贵的建议!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 3\n YannySky, oooing, and LIcopyleft reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 3 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nyfdyh000\n\n\n\n mentioned this issue\n \n May 21, 2023\n \n\n\n\n \n \n 代码方面建议及问题若干\n #42\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n \n \n \n \n \n\n \n \n\n \n \n \n \n\n\n 33 tasks\n \n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n timodaxia2019\n \n\n \n\n \n\n commented\n\n\n Jun 1, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能考虑把采集到的内容直接发布到常用的网站类型中,谢谢!\n\n这属于衍生需求,有很多工具可以做,参考:https://www.zhihu.com/question/52240701\n因此暂时不考虑。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n SarcomTDG\n \n\n \n\n \n\n commented\n\n\n Jun 2, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 加上打印日志功能\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n Flywolfs reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n wzt0501\n \n\n \n\n \n\n commented\n\n\n Jun 7, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.列表页字段和详情页的字段能对应上;\n2.相似的块的字段名称和字段值能不能识别生成,有的只知道区域,但字段名称不一定是固定的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Forrestz88\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n\n采集下来数据的CSV文件的列表头(对应于任务信息当中输出参数的参数名称)的各个字段名称,能否根据采集者的意愿在任务中做配置,从而自动修改?\n\n\n\n\n\n能不能说详细一点什么叫做自动修改,现在各个字段的名称是可以手动修改的。\n\n所谓自动,就是我们事先在任务当中配置好该怎么修改,之后,执行完任务,采集下来的数据已经是按照我们事先的设定改好了,而不需要我们去手动在CSV或Excel文件中修改列表头,这就是我所谓的自动。\n你提到的现在可以手动修改是指在任务配置过程中就修改好了是吗?如果是在任务配置过程中修改就修改好了,逻辑上就已经是我讲的自动修改的意思了。\n我之前用八爪鱼用的很多。八爪鱼当中,列表头的字段名称是可以按照采集者的想法进行任意修改的。你的视频我没时间看。我是昨天一边做别的事,一边一口气听完的,中文的听了16集。软件没上手练,界面可能不熟悉。因为没听到说可以改列表头的这个说法,所以我以为列表头不能在任务配置文件中改呢。\n\n表头可以在设计时改。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n jyxzwd\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n •\n\n \n \n \n \n edited by NaiboWang\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,我想提一个关于对 采集到的数据处理的一个 小建议\n现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式?\n其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 2\n QBH-insist and nunamia reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 2 reactions\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n BanKnight\n \n\n \n\n \n\n commented\n\n\n Jun 23, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n 目前发现还是变量这块不太行(如果实现了,麻烦也告知一下)\n例如:\n\n提取页面数据作为循环次数\n提取页面数据作为xpath依据\n提取页面数据作为“切换下拉选项”的依据\n\n再就是,编辑器部分建议优化为可以拖动那种操作,虽然目前也够用,但是第一次上手还是不太符合直觉\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能支持下载网页中的文件么?例如PDF\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能支持下载网页中的文件么?例如PDF\n\n最新版本特性里就写了可以下载pdf……\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 27, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n不好意思,只注意看教学视频了。\n另外还有一个建议,对于采集多个页面文章的情况下,默认会把文本合并到输出的csv文件里。那么能否在提取数据的设置里增加一项用户自定义分隔项,例如我可以把每篇文章的文本用<|endoftext|>分隔。\n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频:\nhttps://www.bilibili.com/video/BV1qs4y1z7Hc\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行:\n\n如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。\n如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。\n而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是:\n我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n •\n\n \n \n \n \n edited\n \n \n \n \n\n \n \n \n \n \n \n \n\n \n \n \n\n\n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n这个在提取数据操作前用JS操作一下就行了,可以参考下面这个视频: https://www.bilibili.com/video/BV1qs4y1z7Hc\n\n这样试了不行: 如果在“提取页面文本”元素用js追加文本,会给每个p文本都追加。而我希望给整个页面提取出来的文本追加。 如果在该页面所有p文本提取的循环结束后用js追加(如图),结果是没有期望结果。 而且我也不希望追加的eot作为另一个单独的字段。\n这个需求抽象来说就是: 我需要对某个操作或循环的结果,可以用js来处理并替代它们记录到csv。 例如上述例子:详情页面的每个p元素的文本被采集为text字段,那么这个页面处理的循环结束后,我需要用js处理结果,合并它们并追加一个eot标签字符串,然后将这个修改后的页面文本保存到结果csv。\n\n先找到你的EasySpider\\resources\\app\\src\\taskGrid文件夹,把logic_CN.js的第375到383行删掉,即删掉以下内容:\n if (outputNames.indexOf(title) >= 0) { //参数名称已经被添加\n $('#myModal').modal('hide');\n $(\"#tip2\").slideDown(); //提示框\n fadeout = setTimeout(function() {\n $(\"#tip2\").slideUp();\n }, 5000);\n return;\n }\n\n \n \n \n\n \n \n\n \n \n然后把上面自定义操作的名称改成和上面“提取页面文本”操作里一样的字段名称,如参数1_文本,即可使得相同字段里的内容放在一起。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n liujuncn\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jun 28, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n这个办法生效,就是它还是会在csv首行生成一个同名列。如果后续需要处理,例如使用pandas.read,还需要人工去删除一下。\n\nOK,这个bug会在下一个版本修复。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n aogg\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 搞个无头模式,最好能docker运行\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 3, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n搞个无头模式,最好能docker运行\n\n无头模式现在就有。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n能不能加个关闭当前标签页,我点开链接都是新的标签页,导致没法后退,循环立马就断了\n\n没法后退导致循环断不知道你是怎么设计的,理论上不太可能。\n如果想关闭标签页,添加自定义操作,命令值为:window.close()即可。\n\n我加了window.close()之后,直接把整个浏览器关了。\n我的浏览器,点击链接之后就是在新标签页打开,然后每次打开完第一个链接,提取过数据之后,程序就结束了,我也不知道什么情况\n这是我的程序,前面都没问题,点击详情页,就会打开一个新的标签页,然后里面有个元素是下载PDF的,点了以后就可以下载,然后不管加不加window.close(),下载完都会直接结束程序\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n handeserve\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n把你的任务文件发我邮箱:naibowang@foxmail.com,我去帮你看下\n\n好的,谢谢,我的是mac,应该在哪找我的任务文件呢,是这个吗\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 5, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 是的\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 8, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的EasySpider默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n导入MySQL数据库功能的版本已发布,欢迎使用。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n rikka5201\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 11, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n1.多线程任务\n2.执行任务时增加不显示浏览器窗口的选项\n3.提取数据时加入自定义文本(包括转义字符)\n\n这三个功能很早就实现了,请看视频教程列表里的相关内容。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n koushui\n \n\n \n\n \n\n commented\n\n\n Jul 12, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 建议增加 非浏览器模拟访问的 爬取方式,如直接发送Https/http请求,请求里变量变化采集方式\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下:\n现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题,\n仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理,\n由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息,\n导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误,\n和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。\n因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测,\n所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子:\n①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等,\n等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。\n②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。\n③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 18, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n作者您好,首先感谢您的无私奉献!\n\n使用EasySpider有段时间了,目前感觉在日志记录上不够详尽,希望下一版能提供【保存错误信息】的选项,\n\n \n \n \n\n \n \n\n \n \n详细说明如下: 现有的日志文件只是记录了操作指令以及错误消息,没有采集到的上下文内容,无法定位是哪一条信息出问题, 仅能用作开发阶段调试,目前我都是隔一段时间检查控制台,看看有没有错误信息,记录下来采集结束后统一处理, 由于控制台仅能保存最近3000行左右的内容,远远不够容纳数以万计的内容,一会儿不检查可能就会漏掉错误信息, 导致采集的信息缺失甚至错误,所以希望有一个【保存错误信息】的选项,就是采集保存的文件里带有所有错误, 和控制台回显的内容一样就够了,这样就不用人一直在旁边盯着啦。 因为很多时候测试都OK了,一旦实际运行的时间久了,会遇到各种意想不到的问题,这些问题测试阶段无法预测, 所以只能利用控制台回显的错误信息+采集到的上下文内容来定位具体信息,并做针对处理,这里举些例子: ①原本我采集的内容只有检测到对象存在了才进行采集,后来发现对象存在以后,有时加载的内容是错误的,需要等, 等一瞬或者等几秒甚至十几秒不定,才能加载出正确内容,这需要再对采集到的内容文本增加一个解析判断的操作。 ②有时网页加载完毕,显示某个结果为空【正常就是有空有多条的情况】,过了几秒居然又自己刷新出来几条结果。 ③有时循环采集某个多页列表,明明循环列表的XPATH是一样的,但是总有个别时候会报错找不到某某PATH之类。\n类似种种意想不到的问题,只有发现了才能去解决,所以log记录非常重要,希望作者能抽空完善一下,感激不尽!\n\n \n \n \n\n \n \n\n \n \n\n已经更新,可以下载最新源码自行编译或等待下一个版本正式版发布。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n 👍\n 1\n xpkyy reacted with thumbs up emoji\n \n \n \n All reactions\n \n \n\n \n \n 👍\n 1 reaction\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n xpkyy\n \n\n \n\n \n\n commented\n\n\n Jul 19, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 好的,非常感谢!\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n nunamia\n \n\n \n\n \n\n commented\n\n\n Jul 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n\n作者您好,我想提一个关于对 采集到的数据处理的一个 小建议 现在的easySpdier默认是将采集到的数据以 csv 的格式储存在文件里,但是大多数时候我们也会有将数据持久到数据库或者别的文件格式的需求,能否提供自定义的持久化方式? 其实持久化到数据库这个操作也可以通过 现在已经提供的 插入js代码来调用其他api 来“间接”实现这个功能(这是我目前能想到的方法)\n\n谢谢建议,这个程序设置的初衷是为那些不会写代码的朋友准备的,没想到发布后会有很多人想要写入数据库这种程序员级别的操作。\n因为CSV转DB或者写入SQL数据库这个操作已经有很多开源实现,如:https://github.com/simonw/csvs-to-sqlite\n因此这个功能属于边缘功能,大家如果想写入数据库总会找到办法,因此这个功能暂时不会考虑开发。\n我倒希望有朋友能提交Pull Request,帮我一起完善这个软件,很遗憾发布了一个多月还没有有效的PR提交,一个人维护一个项目实在有些力不从心,将来我个人的重点也会主要集中在核心功能的开发上,至于写入数据库这种边缘功能,期待有朋友帮我做。\n谢谢建议~\n\n还刚下载,对整个项目不熟,后续使用熟练了,有时间就参加一起完善\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n sylcool\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Jul 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n可以配置paddleOCR吗,Tesseract特殊字体识别效果不太好。\n\n下个版本会换成ddddocr,paddleOCR需要自行下载代码更换,因为太重量级了无法直接集成。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n Nonce-lv\n \n\n \n\n \n\n commented\n\n\n Jul 26, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 需要增加一个增量爬网的功能。\n比对已经抓取的数量,重复后停止后续操作。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n hzdu\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行\n这里还会涉及到一个学校照片上传的操作,能实现最好了\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Aug 22, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我有一个需求是这样的,录入新生的学籍信息,一个个手输非常的麻烦,能不能用EasySpider导入execl文件以后逐行读取每个学籍信息填入表单里,然后循环去执行 这里还会涉及到一个学校照片上传的操作,能实现最好了\n\n对于批量填写表单功能,使用0.5.0的以下功能配合Excel读取可实现:\n\n\n但因为涉及到了图片上传功能,则需要使用自定义操作中的当前环境下执行代码EXEC功能,配合下面的代码实现:\nhttps://blog.csdn.net/huilan_same/article/details/52439546\n需要注意的是EasySpider定位是一个数据采集软件,因此填写表单这种需求并不是软件的核心。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n HHR-learner\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 21, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n想请问下那种点击按钮下载文件之后的保存路径可以指定吗\n\n用带用户信息的浏览器设计和执行,更改浏览器下载目录即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n \n\n \n\n Repository owner\n\n deleted a comment from \n Nonce-lv \n\n\n Sep 22, 2023\n\n \n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n比如微博发文的作者和发布时间作为两个元素无法实现上述功能\n如果分开提取,会生成两列但也不会一一对应,不在同一个循环提取的对应列会自动填充同一元素\n\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 24, 2023\n \n\n\n\n \n \n 如果我想让两个类型的元素并行两列输出怎么实现?(在同类型元素识别不了的情况下)\n #191\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n \n Owner\n\n\n \n\n Author\n\n\n \n\n \n \n \n\n \n NaiboWang\n \n\n \n\n \n\n commented\n\n\n Sep 24, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n \n我还有一个问题,对于同一个页面,xpath能够定位到所有的元素,为什么还要使用循环去一行一行提取?不能直接提取所有定位到的元素文本吗?\n\n可以直接提取所有定位到的元素文本,如果不使用循环,则所有的元素文本都会在同一列,一共只有一行。\n如果使用循环,参考地震台网,可以生成结构化的多行数据。\n根据自己的实际情况来决定使用哪种方式即可。\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n\n\n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n\n \n \n\n \n \n \n Copy link\n\n \n \n \n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n\n \n steelor\n \n\n \n\n \n\n commented\n\n\n Sep 25, 2023\n\n\n \n \n\n \n\n\n\n \n\n \n\n \n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n \n \n \n\n\n\n\n \n\n \n \n \n \n \n \n \n \n \n All reactions\n \n \n\n \n \n \n \n \n\n \n \n \n\n \n \n \n \n \n \n\n \n \n \n \n\n Sorry, something went wrong.\n \n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n \n\n \n\n \n \n \nNaiboWang\n\n\n\n mentioned this issue\n \n Sep 25, 2023\n \n\n\n\n \n \n 请问如果某些页面xpath不能进行定位(应该有反爬),有其他方法可以选择吗?\n #192\n \n\n\n \n \n \n \n\n Closed\n\n \n\n\n\n\n\n \n\n\n\n\n\n\n\n\n \n \n \n \n \n\n\n\n \n\n\n\n \n \n \n \n Sign up for free\n to join this conversation on GitHub.\n Already have an account?\n Sign in to comment\n\n\n \n\n\n \n\n\n\n \n\n\n\n \n \n\n \n \n Assignees\n \n\n\n \n\n No one assigned\n\n\n\n\n\n\n \n\n \n\n\n \n Labels\n \n\n\n \n None yet\n\n\n\n\n\n \n\n \n \n \n Projects\n \n\n \n\n\n None yet\n\n\n\n \n\n\n \n \n \n \n Milestone\n \n\n No milestone\n\n\n\n\n \n \n \n\n \n \n \n \n \n Development\n \n\n\n\n \n No branches or pull requests\n\n\n\n\n\n \n \n\n\n \n \n\n \n\n \n \n \n \n 20 participants\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\n\n\n \n\n \n\n\n\n\n \n\n\n\n\n \n \n \n\n\n \n\n\n "}],"unique_index":"qs7l3vt4mmlneyhe25","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/tasks/225.json b/.temp_to_pub/EasySpider_windows_x64/tasks/225.json new file mode 100644 index 0000000..41c57d1 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/tasks/225.json @@ -0,0 +1 @@ +{"id":225,"name":"360安全浏览器2023最新版下载-全面保护上网安全就选360安全浏览器-华军软件园","url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","create_time":"10/10/2023, 11:49:12 AM","update_time":"10/10/2023, 12:05:32 PM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.onlinedown.net/soft/66801.htm","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.onlinedown.net/soft/66801.htm","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.onlinedown.net/soft/66801.htm"}],"outputParameters":[{"id":0,"name":"自定义操作","desc":"自定义操作返回的数据","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,4,2,3],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.onlinedown.net/soft/66801.htm","links":"https://www.onlinedown.net/soft/66801.htm","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":3,"index":2,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"lj-btn\")]","iframe":false,"wait":4,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[1]/div[3]/div[2]/div[1]/div[1]/a[1]/b[1]","//b[contains(., '立即下载')]","//B[@class='lj-btn']","/html/body/section/div/div[last()-4]/div[last()-1]/div/div[last()-1]/div/a[last()-1]/b"]}},{"id":4,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"down-list\")]/div[2]/p[1]/a[1]","iframe":false,"wait":20,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/section[1]/div[1]/div[2]/div[1]/div[4]/div[1]/div[1]/div[2]/p[1]/a[1]","//a[contains(., '通用网络下载')]","//A[@class='qrcode_show rest']","/html/body/section/div/div[last()-3]/div[last()-1]/div[last()-4]/div/div[last()-1]/div[last()-1]/p/a[last()-1]"]}},{"id":2,"index":4,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"codeMode":"5","code":"self.browser.command_executor._commands[\"send_command\"] = (\"POST\", \"/session/$sessionId/chromium/send_command\")\nself.paramss = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': 'F:\\\\'}}\nself.browser.execute(\"send_command\", self.paramss)","waitTime":0,"recordASField":0,"paraType":"text"}}]} \ No newline at end of file diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index d251c63..8f341e8 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,7 @@ "justMyCode": false, // "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--id", "[14]", "--headless", "0", "--user_data", "0", "--keyboard", "0"] + "args": ["--id", "[73]", "--headless", "1", "--user_data", "0", "--keyboard", "0"] } ] } \ No newline at end of file diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 6e50fef..57c8ddb 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -1955,7 +1955,11 @@ if __name__ == '__main__': "plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}], "download.directory_upgrade": True, "download.extensions_to_open": "applications/pdf", - "plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF + "plugins.always_open_pdf_externally": True, # 总是在外部程序中打开PDF + "safebrowsing_for_trusted_sources_enabled": False, + "safebrowsing.enabled": False, + 'safebrowsing.disable_download_protection': True, + 'profile.default_content_settings.popups': 0, }) option.add_experimental_option("prefs", { # 设置文件下载路径 @@ -1964,7 +1968,12 @@ if __name__ == '__main__': "plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}], "download.directory_upgrade": True, "download.extensions_to_open": "applications/pdf", - "plugins.always_open_pdf_externally": True # 总是在外部程序中打开PDF + "plugins.always_open_pdf_externally": True, # 总是在外部程序中打开PDF + "safebrowsing_for_trusted_sources_enabled": False, + "safebrowsing.enabled": False, + 'safebrowsing.enabled': False, + 'safebrowsing.disable_download_protection': True, + 'profile.default_content_settings.popups': 0, }) try: if service["environment"] == 1: