mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-21 12:15:16 +08:00
XPath bug fix
This commit is contained in:
parent
76b9b10dc7
commit
22cb45061c
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1 @@
|
|||||||
|
{"id":10,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 7:20:32 PM","update_time":"7/8/2023, 7:23:02 PM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"mysql","saveName":"地震Info","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_页面标题","desc":"","type":"text","recordASField":1,"exampleValue":"手机"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_页面标题","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1 @@
|
|||||||
|
{"id":8,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 6:27:01 AM","update_time":"7/8/2023, 6:27:01 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"手机"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://shouji.jd.com/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
|
@ -0,0 +1 @@
|
|||||||
|
{"id":9,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 6:27:01 AM","update_time":"7/8/2023, 7:21:19 PM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"mysql","saveName":"地震Info","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"手机"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://shouji.jd.com/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
|
@ -1 +1 @@
|
|||||||
{"id":107,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 6:27:01 AM","update_time":"7/8/2023, 6:27:01 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"手机"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://shouji.jd.com/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
|
{"id":107,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 6:27:01 AM","update_time":"7/8/2023, 7:22:40 PM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"地震Info","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"手机"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://shouji.jd.com/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
|
1
.temp_to_pub/EasySpider_windows_x64/tasks/116.json
Normal file
1
.temp_to_pub/EasySpider_windows_x64/tasks/116.json
Normal file
File diff suppressed because one or more lines are too long
1
.temp_to_pub/EasySpider_windows_x64/tasks/117.json
Normal file
1
.temp_to_pub/EasySpider_windows_x64/tasks/117.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"id":117,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/8/2023, 7:20:32 PM","update_time":"7/8/2023, 7:23:02 PM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"mysql","saveName":"地震Info","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_页面标题","desc":"","type":"text","recordASField":1,"exampleValue":"手机"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_页面标题","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"vaoyqp5ljeljt5coby","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
|
5604
.temp_to_pub/EasySpider_windows_x64/tasks/118.json
Normal file
5604
.temp_to_pub/EasySpider_windows_x64/tasks/118.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -37,7 +37,10 @@ def compress_folder_to_7z_split(folder_path, output_file):
|
|||||||
os.remove(split_file)
|
os.remove(split_file)
|
||||||
|
|
||||||
# 压缩文件夹
|
# 压缩文件夹
|
||||||
|
try:
|
||||||
subprocess.call(["7z", "a", "-v95m", output_file, folder_path])
|
subprocess.call(["7z", "a", "-v95m", output_file, folder_path])
|
||||||
|
except:
|
||||||
|
subprocess.call(["7za", "a", "-v95m", output_file, folder_path])
|
||||||
|
|
||||||
easyspider_version = "0.3.5"
|
easyspider_version = "0.3.5"
|
||||||
|
|
||||||
|
@ -144,11 +144,11 @@ let app = new Vue({
|
|||||||
"beforeJS": "",
|
"beforeJS": "",
|
||||||
"beforeJSWaitTime": 0,
|
"beforeJSWaitTime": 0,
|
||||||
"JS": "",
|
"JS": "",
|
||||||
"paraType": "text",
|
|
||||||
"JSWaitTime": 0,
|
"JSWaitTime": 0,
|
||||||
"afterJS": "",
|
"afterJS": "",
|
||||||
"afterJSWaitTime": 0,
|
"afterJSWaitTime": 0,
|
||||||
"downloadPic": 0
|
"downloadPic": 0,
|
||||||
|
"paraType": "text",
|
||||||
});
|
});
|
||||||
this.paraIndex = this.nowNode["parameters"]["paras"].length - 1;
|
this.paraIndex = this.nowNode["parameters"]["paras"].length - 1;
|
||||||
setTimeout(function(){$("#app > div.elements > div.toolkitcontain > table.toolkittb4 > tbody > tr:last-child")[0].scrollIntoView(false); //滚动到底部
|
setTimeout(function(){$("#app > div.elements > div.toolkitcontain > table.toolkittb4 > tbody > tr:last-child")[0].scrollIntoView(false); //滚动到底部
|
||||||
|
@ -42,7 +42,7 @@ from PIL import Image
|
|||||||
# import uuid
|
# import uuid
|
||||||
from threading import Thread, Event
|
from threading import Thread, Event
|
||||||
from myChrome import MyChrome, MyUCChrome
|
from myChrome import MyChrome, MyUCChrome
|
||||||
from utils import check_pause, download_image, get_output_code, isnull, myMySQL, new_line, write_to_csv, write_to_excel
|
from utils import check_pause, download_image, get_output_code, isnull, lowercase_tags_in_xpath, myMySQL, new_line, write_to_csv, write_to_excel
|
||||||
desired_capabilities = DesiredCapabilities.CHROME
|
desired_capabilities = DesiredCapabilities.CHROME
|
||||||
desired_capabilities["pageLoadStrategy"] = "none"
|
desired_capabilities["pageLoadStrategy"] = "none"
|
||||||
|
|
||||||
@ -174,6 +174,11 @@ class BrowserThread(Thread):
|
|||||||
iframe = node["parameters"]["iframe"]
|
iframe = node["parameters"]["iframe"]
|
||||||
except:
|
except:
|
||||||
node["parameters"]["iframe"] = False
|
node["parameters"]["iframe"] = False
|
||||||
|
try:
|
||||||
|
node["parameters"]["xpath"] = lowercase_tags_in_xpath(
|
||||||
|
node["parameters"]["xpath"])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if node["option"] == 1: # 打开网页操作
|
if node["option"] == 1: # 打开网页操作
|
||||||
try:
|
try:
|
||||||
cookies = node["parameters"]["cookies"]
|
cookies = node["parameters"]["cookies"]
|
||||||
@ -186,6 +191,10 @@ class BrowserThread(Thread):
|
|||||||
iframe = para["iframe"]
|
iframe = para["iframe"]
|
||||||
except:
|
except:
|
||||||
para["iframe"] = False
|
para["iframe"] = False
|
||||||
|
try:
|
||||||
|
para["relativeXPath"] = lowercase_tags_in_xpath(para["relativeXPath"])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
|
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
|
||||||
para["optimizable"] = True
|
para["optimizable"] = True
|
||||||
else:
|
else:
|
||||||
@ -1158,12 +1167,23 @@ class BrowserThread(Thread):
|
|||||||
if self.browser.iframe_env != p["iframe"]:
|
if self.browser.iframe_env != p["iframe"]:
|
||||||
p["optimizable"] = False
|
p["optimizable"] = False
|
||||||
continue
|
continue
|
||||||
p["relativeXPath"] = p["relativeXPath"].lower()
|
# p["relativeXPath"] = p["relativeXPath"].lower()
|
||||||
|
# p["relativeXPath"] = lowercase_tags_in_xpath(p["relativeXPath"])
|
||||||
if p["nodeType"] == 2:
|
if p["nodeType"] == 2:
|
||||||
|
if p["relativeXPath"].find("/@href") >= 0:
|
||||||
|
xpath = p["relativeXPath"]
|
||||||
|
else:
|
||||||
xpath = p["relativeXPath"] + "/@href"
|
xpath = p["relativeXPath"] + "/@href"
|
||||||
elif p["contentType"] == 1:
|
elif p["contentType"] == 1:
|
||||||
|
# 已经有text()了,不需要再加
|
||||||
|
if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
|
||||||
|
xpath = p["relativeXPath"]
|
||||||
|
else:
|
||||||
xpath = p["relativeXPath"] + "/text()"
|
xpath = p["relativeXPath"] + "/text()"
|
||||||
elif p["contentType"] == 0:
|
elif p["contentType"] == 0:
|
||||||
|
if p["relativeXPath"].find("/text()") >= 0 or p["relativeXPath"].find("::text()") >= 0:
|
||||||
|
xpath = p["relativeXPath"]
|
||||||
|
else:
|
||||||
xpath = p["relativeXPath"] + "//text()"
|
xpath = p["relativeXPath"] + "//text()"
|
||||||
if p["relative"]:
|
if p["relative"]:
|
||||||
# if p["relativeXPath"] == "":
|
# if p["relativeXPath"] == "":
|
||||||
@ -1214,7 +1234,8 @@ class BrowserThread(Thread):
|
|||||||
content = ""
|
content = ""
|
||||||
if not (p["contentType"] == 5 or p["contentType"] == 6): # 如果不是页面标题或URL,去找元素
|
if not (p["contentType"] == 5 or p["contentType"] == 6): # 如果不是页面标题或URL,去找元素
|
||||||
try:
|
try:
|
||||||
p["relativeXPath"] = p["relativeXPath"].lower()
|
# p["relativeXPath"] = p["relativeXPath"].lower()
|
||||||
|
# p["relativeXPath"] = lowercase_tags_in_xpath(p["relativeXPath"])
|
||||||
if p["relative"]: # 是否相对xpath
|
if p["relative"]: # 是否相对xpath
|
||||||
if p["relativeXPath"] == "": # 相对xpath有时候就是元素本身,不需要二次查找
|
if p["relativeXPath"] == "": # 相对xpath有时候就是元素本身,不需要二次查找
|
||||||
element = loopElement
|
element = loopElement
|
||||||
|
@ -1,35 +1,44 @@
|
|||||||
from lxml import etree
|
# from lxml import etree
|
||||||
|
|
||||||
# 解析HTML
|
# # 解析HTML
|
||||||
html = """
|
# html = """
|
||||||
<div>
|
# <div>
|
||||||
123
|
# 123
|
||||||
<ul class="list">
|
# <ul class="list">
|
||||||
<li class="item-0">first item</li>
|
# <li class="item-0">first item</li>
|
||||||
<li class="item-1"><a href="link2.html">second item</a></li>
|
# <li class="item-1"><a href="link2.html">second item</a></li>
|
||||||
</ul>
|
# </ul>
|
||||||
456
|
# 456
|
||||||
<div></div>
|
# <div></div>
|
||||||
789
|
# 789
|
||||||
</div>
|
# </div>
|
||||||
"""
|
# """
|
||||||
html = etree.HTML(html)
|
# html = etree.HTML(html)
|
||||||
element = html.xpath("*")
|
# element = html.xpath("*")
|
||||||
direct_text = "/html/body/" + html[0][0].tag + "/text()"
|
# direct_text = "/html/body/" + html[0][0].tag + "/text()"
|
||||||
all_text = "/html/body/" + html[0][0].tag + "//text()"
|
# all_text = "/html/body/" + html[0][0].tag + "//text()"
|
||||||
# 使用XPath选择元素
|
# # 使用XPath选择元素
|
||||||
results = html.xpath(direct_text)
|
# results = html.xpath(direct_text)
|
||||||
# print(results)
|
# # print(results)
|
||||||
# 拼接所有文本内容并去掉两边的空白
|
# # 拼接所有文本内容并去掉两边的空白
|
||||||
text = ' '.join(result.strip() for result in results if result.strip())
|
# text = ' '.join(result.strip() for result in results if result.strip())
|
||||||
|
|
||||||
# 输出结果
|
# # 输出结果
|
||||||
print(text)
|
# print(text)
|
||||||
|
|
||||||
results = html.xpath(all_text)
|
# results = html.xpath(all_text)
|
||||||
# print(results)
|
# # print(results)
|
||||||
# 拼接所有文本内容并去掉两边的空白
|
# # 拼接所有文本内容并去掉两边的空白
|
||||||
text = ' '.join(result.strip() for result in results if result.strip())
|
# text = ' '.join(result.strip() for result in results if result.strip())
|
||||||
|
|
||||||
# 输出结果
|
# # 输出结果
|
||||||
print(text)
|
# print(text)
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
def lowercase_xpath_tags(xpath):
|
||||||
|
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
|
||||||
|
|
||||||
|
print(lowercase_xpath_tags('//DIV[@id="J_recommendGoods"]/DIV[2]/UL'))
|
||||||
|
print("//strong//span[contains(@class,'page-item_M4MDr')]/..//following-sibling::a[1]")
|
||||||
|
print("")
|
@ -4,6 +4,7 @@ import csv
|
|||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
import keyboard
|
import keyboard
|
||||||
@ -11,6 +12,7 @@ from openpyxl import Workbook, load_workbook
|
|||||||
import requests
|
import requests
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import pymysql
|
import pymysql
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
def is_valid_url(url):
|
def is_valid_url(url):
|
||||||
try:
|
try:
|
||||||
@ -19,6 +21,8 @@ def is_valid_url(url):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def lowercase_tags_in_xpath(xpath):
|
||||||
|
return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath)
|
||||||
|
|
||||||
def check_pause(key, event):
|
def check_pause(key, event):
|
||||||
while True:
|
while True:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user