diff --git a/ElectronJS/tasks/83.json b/ElectronJS/tasks/83.json new file mode 100644 index 0000000..e77aefa --- /dev/null +++ b/ElectronJS/tasks/83.json @@ -0,0 +1 @@ +{"id":83,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/27/2023, 7:33:15 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":8,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"},{"num":1,"value":"/家用电器"},{"num":2,"value":"/电脑/办公"},{"num":3,"value":"/家纺/家居/厨具"},{"num":4,"value":"/家具/家装/灯具/工业品"},{"num":5,"value":"/内衣/男装/女装/童装"},{"num":6,"value":"/箱包/钟表/珠宝/女鞋"},{"num":7,"value":"/运动/户外/男鞋"},{"num":8,"value":"/汽车用品/车载电器"},{"num":9,"value":"/母婴/洗护喂养"},{"num":10,"value":"/玩具乐器/宠物生活"},{"num":11,"value":"/家庭清洁/个人护理/计生情趣"},{"num":12,"value":"/图书/童书/文学"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]} \ No newline at end of file diff --git a/ExecuteStage/.gitignore b/ExecuteStage/.gitignore index 918977c..b9eb9e4 100644 --- a/ExecuteStage/.gitignore +++ b/ExecuteStage/.gitignore @@ -1,3 +1,4 @@ +EasySpider/ node_modules/ *.csv .idea/ diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index 0482ad9..f4001aa 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -2,7 +2,7 @@ // Use IntelliSense to learn about possible attributes. // Hover to view descriptions of existing attributes. // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", + "version": "0.3.1", "configurations": [ { "name": "Python: EasySpider", @@ -12,7 +12,7 @@ "console": "integratedTerminal", "justMyCode": true, // "args": ["--id", "38", "--read_type", "local", "--headless", "1"] - "args": ["--id", "[21, 22]", "--headless", "0", "--user_data", "1"] + "args": ["--id", "[5]", "--headless", "0", "--user_data", "0"] } ] } \ No newline at end of file diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 02dd2c4..a3009ee 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -867,7 +867,14 @@ class BrowserThread(Thread): except Exception as e: content = "OCR Error" print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH (need to restart EasySpider after you put in PATH): https://tesseract-ocr.github.io/tessdoc/Installation.html") - print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501\nhttps://www.bilibili.com/video/BV1xz4y1b72D/") + if sys.platform == "win32": + print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501\nhttps://www.bilibili.com/video/BV1xz4y1b72D/") + elif sys.platform == "darwin": + print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://zhuanlan.zhihu.com/p/146044810") + elif sys.platform == "linux": + print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://zhuanlan.zhihu.com/p/420259031") + else: + print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501\nhttps://www.bilibili.com/video/BV1xz4y1b72D/") elif p["contentType"] == 9: content = self.execute_code(2, p["JS"], p["JSWaitTime"], element) elif p["contentType"] == 10: # 下拉框选中的值