mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-22 18:45:40 +08:00
Linux OCR Hint
This commit is contained in:
parent
b746556d71
commit
5853d2da00
1
ElectronJS/tasks/83.json
Normal file
1
ElectronJS/tasks/83.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"id":83,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/27/2023, 7:33:15 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"string","exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":8,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"},{"num":1,"value":"/家用电器"},{"num":2,"value":"/电脑/办公"},{"num":3,"value":"/家纺/家居/厨具"},{"num":4,"value":"/家具/家装/灯具/工业品"},{"num":5,"value":"/内衣/男装/女装/童装"},{"num":6,"value":"/箱包/钟表/珠宝/女鞋"},{"num":7,"value":"/运动/户外/男鞋"},{"num":8,"value":"/汽车用品/车载电器"},{"num":9,"value":"/母婴/洗护喂养"},{"num":10,"value":"/玩具乐器/宠物生活"},{"num":11,"value":"/家庭清洁/个人护理/计生情趣"},{"num":12,"value":"/图书/童书/文学"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
|
1
ExecuteStage/.gitignore
vendored
1
ExecuteStage/.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
|
EasySpider/
|
||||||
node_modules/
|
node_modules/
|
||||||
*.csv
|
*.csv
|
||||||
.idea/
|
.idea/
|
||||||
|
4
ExecuteStage/.vscode/launch.json
vendored
4
ExecuteStage/.vscode/launch.json
vendored
@ -2,7 +2,7 @@
|
|||||||
// Use IntelliSense to learn about possible attributes.
|
// Use IntelliSense to learn about possible attributes.
|
||||||
// Hover to view descriptions of existing attributes.
|
// Hover to view descriptions of existing attributes.
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
"version": "0.2.0",
|
"version": "0.3.1",
|
||||||
"configurations": [
|
"configurations": [
|
||||||
{
|
{
|
||||||
"name": "Python: EasySpider",
|
"name": "Python: EasySpider",
|
||||||
@ -12,7 +12,7 @@
|
|||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": true,
|
"justMyCode": true,
|
||||||
// "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
|
// "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
|
||||||
"args": ["--id", "[21, 22]", "--headless", "0", "--user_data", "1"]
|
"args": ["--id", "[5]", "--headless", "0", "--user_data", "0"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
@ -867,6 +867,13 @@ class BrowserThread(Thread):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
content = "OCR Error"
|
content = "OCR Error"
|
||||||
print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH (need to restart EasySpider after you put in PATH): https://tesseract-ocr.github.io/tessdoc/Installation.html")
|
print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH (need to restart EasySpider after you put in PATH): https://tesseract-ocr.github.io/tessdoc/Installation.html")
|
||||||
|
if sys.platform == "win32":
|
||||||
|
print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501\nhttps://www.bilibili.com/video/BV1xz4y1b72D/")
|
||||||
|
elif sys.platform == "darwin":
|
||||||
|
print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://zhuanlan.zhihu.com/p/146044810")
|
||||||
|
elif sys.platform == "linux":
|
||||||
|
print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://zhuanlan.zhihu.com/p/420259031")
|
||||||
|
else:
|
||||||
print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501\nhttps://www.bilibili.com/video/BV1xz4y1b72D/")
|
print("要使用OCR识别功能,你需要安装Tesseract-OCR并将其添加到环境变量PATH中(添加后需重启EasySpider):https://blog.csdn.net/u010454030/article/details/80515501\nhttps://www.bilibili.com/video/BV1xz4y1b72D/")
|
||||||
elif p["contentType"] == 9:
|
elif p["contentType"] == 9:
|
||||||
content = self.execute_code(2, p["JS"], p["JSWaitTime"], element)
|
content = self.execute_code(2, p["JS"], p["JSWaitTime"], element)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user