mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-20 08:04:59 +08:00
Merge pull request #360 from touero/master
Fixing get data before if case in preprocess event loop
This commit is contained in:
commit
c272e5da86
@ -234,24 +234,20 @@ class BrowserThread(Thread):
|
|||||||
|
|
||||||
# 检测如果没有复杂的操作,优化提取数据流程
|
# 检测如果没有复杂的操作,优化提取数据流程
|
||||||
def preprocess(self):
|
def preprocess(self):
|
||||||
for node in self.procedure:
|
for index_node, node in enumerate(self.procedure):
|
||||||
try:
|
parameters = node["parameters"]
|
||||||
iframe = node["parameters"]["iframe"]
|
iframe = parameters.get('iframe')
|
||||||
except:
|
parameters["iframe"] = False if not iframe else ...
|
||||||
node["parameters"]["iframe"] = False
|
if parameters.get("xpath"):
|
||||||
|
parameters["xpath"] = lowercase_tags_in_xpath(parameters["xpath"])
|
||||||
|
|
||||||
|
if parameters.get("waitElementIframeIndex"):
|
||||||
|
parameters["waitElementIframeIndex"] = int(parameters["waitElementIframeIndex"])
|
||||||
|
else:
|
||||||
|
parameters["waitElement"] = ""
|
||||||
|
parameters["waitElementTime"] = 10
|
||||||
|
parameters["waitElementIframeIndex"] = 0
|
||||||
|
|
||||||
try:
|
|
||||||
node["parameters"]["xpath"] = lowercase_tags_in_xpath(
|
|
||||||
node["parameters"]["xpath"])
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
node["parameters"]["waitElementIframeIndex"] = int(
|
|
||||||
node["parameters"]["waitElementIframeIndex"])
|
|
||||||
except:
|
|
||||||
node["parameters"]["waitElement"] = ""
|
|
||||||
node["parameters"]["waitElementTime"] = 10
|
|
||||||
node["parameters"]["waitElementIframeIndex"] = 0
|
|
||||||
if node["option"] == 1: # 打开网页操作
|
if node["option"] == 1: # 打开网页操作
|
||||||
try:
|
try:
|
||||||
cookies = node["parameters"]["cookies"]
|
cookies = node["parameters"]["cookies"]
|
||||||
@ -409,6 +405,7 @@ class BrowserThread(Thread):
|
|||||||
"nodeType": param["nodeType"],
|
"nodeType": param["nodeType"],
|
||||||
"default": param["default"],
|
"default": param["default"],
|
||||||
})
|
})
|
||||||
|
self.procedure[index_node]["parameters"] = parameters
|
||||||
self.print_and_log("预处理完成|Preprocess completed")
|
self.print_and_log("预处理完成|Preprocess completed")
|
||||||
|
|
||||||
def readFromExcel(self):
|
def readFromExcel(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user