From bcdf6fb413791d11cbe7aca075e2a40c24477d4f Mon Sep 17 00:00:00 2001 From: naibo Date: Sun, 9 Jul 2023 05:43:32 +0800 Subject: [PATCH] V0.3.5 --- .../execution_instances/0.json | 1 + .../execution_instances/1.json | 1 + .../EasySpider_windows_x64/tasks/119.json | 2 +- ExecuteStage/easyspider_executestage.py | 21 +++++++------------ 4 files changed, 10 insertions(+), 15 deletions(-) create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/0.json create mode 100644 .temp_to_pub/EasySpider_windows_x64/execution_instances/1.json diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json new file mode 100644 index 0000000..fccdd52 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/0.json @@ -0,0 +1 @@ +{"id":0,"name":"Error 403 (Forbidden)!!1","url":"https://www.youtube.com/@indymogul/videos","links":"https://www.youtube.com/@indymogul/videos","create_time":"","update_time":"7/9/2023, 5:19:09 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.youtube.com/@indymogul/videos","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.youtube.com/@indymogul/videos","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.youtube.com/@indymogul/videos"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"RED KOMODO vs Canon C70 - Which One For You?"},{"id":1,"name":"自定义参数_1","desc":"","type":"text","recordASField":1,"exampleValue":"自定义值"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":3,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.youtube.com/@indymogul/videos","links":"https://www.youtube.com/@indymogul/videos","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"video-title\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/ytd-app[1]/div[1]/ytd-page-manager[1]/ytd-browse[1]/ytd-two-column-browse-results-renderer[1]/div[1]/ytd-rich-grid-renderer[1]/div[6]/ytd-rich-grid-row[1]/div[1]/ytd-rich-item-renderer[1]/div[1]/ytd-rich-grid-media[1]/div[1]/div[2]/div[1]/h3[1]/a[1]/yt-formatted-string[1]","//yt-formatted-string[contains(., 'RED KOMODO')]","id(\"video-title\")","//YT-FORMATTED-STRING[@class='style-scope ytd-rich-grid-media']","/html/body/ytd-app/div[last()-1]/ytd-page-manager/ytd-browse/ytd-two-column-browse-results-renderer/div[last()-1]/ytd-rich-grid-renderer/div/ytd-rich-grid-row[last()-6]/div/ytd-rich-item-renderer[last()-3]/div/ytd-rich-grid-media/div[last()-1]/div[last()-1]/div[last()-1]/h3/a/yt-formatted-string"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":2,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"//*[@id=\"video-title-link\"]","allXPaths":"","exampleValues":[{"num":0,"value":"RED KOMODO vs Canon C70 - Which One For You?"}],"unique_index":"c7rh7qqmz6iljuhcxqr","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"自定义参数_1","desc":"","extractType":0,"relativeXPath":"//*[@id=\"video-title\"]","recordASField":1,"allXPaths":[],"exampleValues":[{"num":0,"value":"自定义值"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"paraType":"text"}],"loopType":1}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json b/.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json new file mode 100644 index 0000000..4376a87 --- /dev/null +++ b/.temp_to_pub/EasySpider_windows_x64/execution_instances/1.json @@ -0,0 +1 @@ +{"id":1,"name":"Error 403 (Forbidden)!!1","url":"https://www.youtube.com/@indymogul/videos","links":"https://www.youtube.com/@indymogul/videos","create_time":"7/9/2023, 5:37:25 AM","update_time":"7/9/2023, 5:39:38 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"csv","saveName":"current_time","containJudge":false,"desc":"https://www.youtube.com/@indymogul/videos","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.youtube.com/@indymogul/videos","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.youtube.com/@indymogul/videos"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"RED KOMODO vs Canon C70 - Which One For You?"},{"id":1,"name":"自定义参数_2","desc":"","type":"text","recordASField":1,"exampleValue":"自定义值"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.youtube.com/@indymogul/videos","links":"https://www.youtube.com/@indymogul/videos","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"video-title\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/ytd-app[1]/div[1]/ytd-page-manager[1]/ytd-browse[1]/ytd-two-column-browse-results-renderer[1]/div[1]/ytd-rich-grid-renderer[1]/div[6]/ytd-rich-grid-row[1]/div[1]/ytd-rich-item-renderer[1]/div[1]/ytd-rich-grid-media[1]/div[1]/div[2]/div[1]/h3[1]/a[1]/yt-formatted-string[1]","//yt-formatted-string[contains(., 'RED KOMODO')]","id(\"video-title\")","//YT-FORMATTED-STRING[@class='style-scope ytd-rich-grid-media']","/html/body/ytd-app/div[last()-1]/ytd-page-manager/ytd-browse/ytd-two-column-browse-results-renderer/div[last()-1]/ytd-rich-grid-renderer/div/ytd-rich-grid-row[last()-6]/div/ytd-rich-item-renderer[last()-3]/div/ytd-rich-grid-media/div[last()-1]/div[last()-1]/div[last()-1]/h3/a/yt-formatted-string"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"//*[@id=\"video-title\"]/@href","allXPaths":"","exampleValues":[{"num":0,"value":"RED KOMODO vs Canon C70 - Which One For You?"}],"unique_index":"3443502pcccljuizl0j","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"自定义参数_2","desc":"","extractType":0,"relativeXPath":"//*[@id=\"video-title-link\"]/@href","recordASField":1,"allXPaths":[],"exampleValues":[{"num":0,"value":"自定义值"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"paraType":"text"}],"loopType":1}}]} \ No newline at end of file diff --git a/.temp_to_pub/EasySpider_windows_x64/tasks/119.json b/.temp_to_pub/EasySpider_windows_x64/tasks/119.json index a9068e4..8ad2f49 100644 --- a/.temp_to_pub/EasySpider_windows_x64/tasks/119.json +++ b/.temp_to_pub/EasySpider_windows_x64/tasks/119.json @@ -1 +1 @@ -{"id":119,"name":"Error 403 (Forbidden)!!1","url":"https://www.youtube.com/@indymogul/videos","links":"https://www.youtube.com/@indymogul/videos","create_time":"","update_time":"7/9/2023, 5:14:25 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.youtube.com/@indymogul/videos","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.youtube.com/@indymogul/videos","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.youtube.com/@indymogul/videos"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"RED KOMODO vs Canon C70 - Which One For You?"},{"id":1,"name":"自定义参数_1","desc":"","type":"text","recordASField":1,"exampleValue":"自定义值"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":3,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.youtube.com/@indymogul/videos","links":"https://www.youtube.com/@indymogul/videos","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"video-title\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/ytd-app[1]/div[1]/ytd-page-manager[1]/ytd-browse[1]/ytd-two-column-browse-results-renderer[1]/div[1]/ytd-rich-grid-renderer[1]/div[6]/ytd-rich-grid-row[1]/div[1]/ytd-rich-item-renderer[1]/div[1]/ytd-rich-grid-media[1]/div[1]/div[2]/div[1]/h3[1]/a[1]/yt-formatted-string[1]","//yt-formatted-string[contains(., 'RED KOMODO')]","id(\"video-title\")","//YT-FORMATTED-STRING[@class='style-scope ytd-rich-grid-media']","/html/body/ytd-app/div[last()-1]/ytd-page-manager/ytd-browse/ytd-two-column-browse-results-renderer/div[last()-1]/ytd-rich-grid-renderer/div/ytd-rich-grid-row[last()-6]/div/ytd-rich-item-renderer[last()-3]/div/ytd-rich-grid-media/div[last()-1]/div[last()-1]/div[last()-1]/h3/a/yt-formatted-string"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"//*[@id=\"video-title-link\"]/@href","allXPaths":"","exampleValues":[{"num":0,"value":"RED KOMODO vs Canon C70 - Which One For You?"}],"unique_index":"c7rh7qqmz6iljuhcxqr","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"自定义参数_1","desc":"","extractType":0,"relativeXPath":"//*[@id=\"video-title\"]","recordASField":1,"allXPaths":[],"exampleValues":[{"num":0,"value":"自定义值"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"paraType":"text"}],"loopType":1}}]} \ No newline at end of file +{"id":119,"name":"Error 403 (Forbidden)!!1","url":"https://www.youtube.com/@indymogul/videos","links":"https://www.youtube.com/@indymogul/videos","create_time":"7/9/2023, 5:37:25 AM","update_time":"7/9/2023, 5:39:38 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"csv","saveName":"current_time","containJudge":false,"desc":"https://www.youtube.com/@indymogul/videos","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.youtube.com/@indymogul/videos","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.youtube.com/@indymogul/videos"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"RED KOMODO vs Canon C70 - Which One For You?"},{"id":1,"name":"自定义参数_2","desc":"","type":"text","recordASField":1,"exampleValue":"自定义值"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.youtube.com/@indymogul/videos","links":"https://www.youtube.com/@indymogul/videos","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"video-title\"]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/ytd-app[1]/div[1]/ytd-page-manager[1]/ytd-browse[1]/ytd-two-column-browse-results-renderer[1]/div[1]/ytd-rich-grid-renderer[1]/div[6]/ytd-rich-grid-row[1]/div[1]/ytd-rich-item-renderer[1]/div[1]/ytd-rich-grid-media[1]/div[1]/div[2]/div[1]/h3[1]/a[1]/yt-formatted-string[1]","//yt-formatted-string[contains(., 'RED KOMODO')]","id(\"video-title\")","//YT-FORMATTED-STRING[@class='style-scope ytd-rich-grid-media']","/html/body/ytd-app/div[last()-1]/ytd-page-manager/ytd-browse/ytd-two-column-browse-results-renderer/div[last()-1]/ytd-rich-grid-renderer/div/ytd-rich-grid-row[last()-6]/div/ytd-rich-item-renderer[last()-3]/div/ytd-rich-grid-media/div[last()-1]/div[last()-1]/div[last()-1]/h3/a/yt-formatted-string"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"//*[@id=\"video-title\"]/@href","allXPaths":"","exampleValues":[{"num":0,"value":"RED KOMODO vs Canon C70 - Which One For You?"}],"unique_index":"3443502pcccljuizl0j","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"自定义参数_2","desc":"","extractType":0,"relativeXPath":"//*[@id=\"video-title-link\"]/@href","recordASField":1,"allXPaths":[],"exampleValues":[{"num":0,"value":"自定义值"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"paraType":"text"}],"loopType":1}}]} \ No newline at end of file diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index d7f25cd..bfee78d 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -1172,22 +1172,15 @@ class BrowserThread(Thread): continue # p["relativeXPath"] = p["relativeXPath"].lower() # p["relativeXPath"] = lowercase_tags_in_xpath(p["relativeXPath"]) - if p["nodeType"] == 2: - if p["relativeXPath"].find("/@href") >= 0: - xpath = p["relativeXPath"] - else: - xpath = p["relativeXPath"] + "/@href" + # 已经有text()或@href了,不需要再加 + if p["relativeXPath"].find("/@href") >= 0 or p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0: + xpath = p["relativeXPath"] + elif p["nodeType"] == 2: + xpath = p["relativeXPath"] + "/@href" elif p["contentType"] == 1: - # 已经有text()了,不需要再加 - if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0: - xpath = p["relativeXPath"] - else: - xpath = p["relativeXPath"] + "/text()" + xpath = p["relativeXPath"] + "/text()" elif p["contentType"] == 0: - if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0: - xpath = p["relativeXPath"] - else: - xpath = p["relativeXPath"] + "//text()" + xpath = p["relativeXPath"] + "//text()" if p["relative"]: # if p["relativeXPath"] == "": # content = [loopElementHTML]