mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-16 16:26:56 +08:00
MacOS Test
This commit is contained in:
parent
e4037e221d
commit
5376aa37b0
File diff suppressed because it is too large
Load Diff
@ -37,19 +37,21 @@ class MyChrome(webdriver.Chrome):
|
||||
except Exception as e:
|
||||
print(e)
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并点击里面的元素
|
||||
# 遍历所有的 iframe 并查找里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
try:
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在 iframe 中查找元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
element = super().find_element(by=by, value=value)
|
||||
find_element = True
|
||||
except:
|
||||
print("No such element found in the iframe")
|
||||
except NoSuchElementException as e:
|
||||
print(f"No such element found in the iframe: {str(e)}")
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}")
|
||||
# 完成操作后切回主文档
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
@ -68,14 +70,14 @@ class MyChrome(webdriver.Chrome):
|
||||
# 获取所有的 iframe
|
||||
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并点击里面的元素
|
||||
# 遍历所有的 iframe 并找到里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
try:
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在 iframe 中查找元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
elements = super().find_elements(by=by, value=value)
|
||||
if len(elements) > 0:
|
||||
@ -84,8 +86,10 @@ class MyChrome(webdriver.Chrome):
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
return elements
|
||||
except:
|
||||
print("No such element found in the iframe")
|
||||
except NoSuchElementException as e:
|
||||
print(f"No such element found in the iframe: {str(e)}")
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}")
|
||||
if not find_element:
|
||||
raise NoSuchElementException
|
||||
else:
|
||||
@ -117,19 +121,21 @@ if sys.platform != "darwin":
|
||||
except Exception as e:
|
||||
print(e)
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并点击里面的元素
|
||||
# 遍历所有的 iframe 并找到里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
try:
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在 iframe 中查找元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
element = super().find_element(by=by, value=value)
|
||||
find_element = True
|
||||
except:
|
||||
print("No such element found in the iframe")
|
||||
except NoSuchElementException as e:
|
||||
print(f"No such element found in the iframe: {str(e)}")
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}")
|
||||
# 完成操作后切回主文档
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
@ -148,14 +154,14 @@ if sys.platform != "darwin":
|
||||
# 获取所有的 iframe
|
||||
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并点击里面的元素
|
||||
# 遍历所有的 iframe 并查找里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
try:
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在 iframe 中查找元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
elements = super().find_elements(by=by, value=value)
|
||||
if len(elements) > 0:
|
||||
@ -164,8 +170,10 @@ if sys.platform != "darwin":
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
return elements
|
||||
except:
|
||||
print("No such element found in the iframe")
|
||||
except NoSuchElementException as e:
|
||||
print(f"No such element found in the iframe: {str(e)}")
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}")
|
||||
if not find_element:
|
||||
raise NoSuchElementException
|
||||
else:
|
||||
|
@ -31,7 +31,7 @@ def lowercase_tags_in_xpath(xpath):
|
||||
def on_press_creator(press_time, event):
|
||||
def on_press(key):
|
||||
try:
|
||||
if key.char == 'p':
|
||||
if key.char == press_time["pause_key"]:
|
||||
if press_time["is_pressed"] == False: # 没按下p键时,记录按下p键的时间
|
||||
press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = True
|
||||
@ -39,14 +39,14 @@ def on_press_creator(press_time, event):
|
||||
duration = time.time() - press_time["duration"]
|
||||
if duration > 2:
|
||||
if event._flag == False:
|
||||
print("任务执行中,长按p键暂停执行。")
|
||||
print("Task is running, long press 'p' to pause.")
|
||||
print("任务执行中,长按" + press_time["pause_key"] + "键暂停执行。")
|
||||
print("Task is running, long press '" + press_time["pause_key"] + "' to pause.")
|
||||
# 设置Event的值为True,使得线程b可以继续执行
|
||||
event.set()
|
||||
else:
|
||||
# 设置Event的值为False,使得线程b暂停执行
|
||||
print("任务已暂停,长按p键继续执行...")
|
||||
print("Task paused, long press 'p' to continue...")
|
||||
print("任务已暂停,长按" + press_time["pause_key"] + "键继续执行...")
|
||||
print("Task paused, long press '" + press_time["pause_key"] + "' to continue...")
|
||||
event.clear()
|
||||
press_time["duration"] = time.time()
|
||||
press_time["is_pressed"] = False
|
||||
@ -176,26 +176,36 @@ def write_to_csv(file_name, data, record):
|
||||
f_csv.writerow(to_write)
|
||||
f.close()
|
||||
|
||||
|
||||
def eval_repl(matchobj):
|
||||
print(matchobj.group(1))
|
||||
return str(eval(matchobj.group(1), globals(), locals()))
|
||||
|
||||
|
||||
|
||||
def replace_field_values(orginal_text, outputParameters, browser=None):
|
||||
pattern = r'Field\["([^"]+)"\]'
|
||||
try:
|
||||
replaced_text = re.sub(
|
||||
pattern, lambda match: outputParameters.get(match.group(1), ''), orginal_text)
|
||||
if replaced_text.find("EVAL") != -1: # 如果返回值中包含EVAL
|
||||
if re.search(r'eval\(', replaced_text, re.IGNORECASE): # 如果返回值中包含EVAL
|
||||
replaced_text = replaced_text.replace("self.", "browser.")
|
||||
replaced_text = re.sub(r'EVAL\("(.*?)"\)', lambda match: str(eval(match.group(1))), replaced_text)
|
||||
except:
|
||||
pattern = re.compile(r'(?i)eval\("(.+?)"\)')
|
||||
# 循环替换所有匹配到的eval语句
|
||||
while True:
|
||||
match = pattern.search(replaced_text)
|
||||
if not match:
|
||||
break
|
||||
# 执行eval并将其结果转换为字符串形式
|
||||
eval_replaced_text = str(eval(match.group(1)))
|
||||
# 替换eval语句
|
||||
replaced_text = replaced_text.replace(match.group(0), eval_replaced_text)
|
||||
except Exception as e:
|
||||
print("eval替换失败,请检查eval语句是否正确。| Failed to replace eval, please check if the eval statement is correct.")
|
||||
replaced_text = orginal_text
|
||||
return replaced_text
|
||||
|
||||
|
||||
def readCode(code):
|
||||
if code.startswith("outside:"):
|
||||
file_name = os.path.join(os.path.abspath("./"), code[8:])
|
||||
with open(file_name, 'r', encoding='utf-8-sig') as file_obj:
|
||||
code = file_obj.read()
|
||||
return code
|
||||
|
||||
def write_to_json(file_name, data, types, record, keys):
|
||||
keys = list(keys)
|
||||
# Prepare empty list for data
|
||||
|
57
.temp_to_pub/EasySpider_MacOS_all_arch/myCode.py
Normal file
57
.temp_to_pub/EasySpider_MacOS_all_arch/myCode.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
这是一个示例代码文件,可以直接在这里写Python代码,然后在程序中的exec操作中调用。如果此文件名称为myCode.py,请将此文件放置在EasySpider程序目录下(和Data/文件夹同级),那么在程序中的exec操作中可以直接写outside:myCode.py来调用此文件中的代码,示例:
|
||||
|
||||
1. 用self.browser表示当前操作的浏览器,可直接用selenium的API进行操作,如self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END)即可滚动到页面最下方。
|
||||
2. 自定义一个全局变量:self.myVar = 1
|
||||
3. 操纵上面定义的全局变量:self.myVar = self.myVar + 1
|
||||
4. 打印上面定义的全局变量:print(self.myVar)
|
||||
5. 将自定义变量的值赋值为某个字段提取的值:self.myVar = self.outputParameters["字段名"]
|
||||
6. 修改某个字段提取的值:self.outputParameters["字段名"] = "新值"
|
||||
|
||||
对于更加复杂的操作,请直接下载源代码并编译执行。
|
||||
|
||||
This is a sample code snippet file. You can directly write Python code here, and then call it in the program using an `exec` operation. If this file is named myCode.py, please place this file under the EasySpider program directory (at the same level as the Data/ folder). Then, in the program's `exec` operation, you can directly write outside:myCode.py to invoke the code from this file. Examples:
|
||||
|
||||
1. Use self.browser to refer to the current browser being operated on. You can directly utilize the selenium API to perform actions. For instance, self.browser.find_element(By.CSS_SELECTOR, "body").send_keys(Keys.END) will scroll to the bottom of the page.
|
||||
2. Define a global variable: self.myVar = 1
|
||||
3. Manipulate the above-defined global variable: self.myVar = self.myVar + 1
|
||||
4. Print the above-defined global variable: print(self.myVar)
|
||||
5. Assign a value to the custom variable from a value extracted for some field: self.myVar = self.outputParameters["field name"]
|
||||
6. Modify the value extracted for some field: self.outputParameters["field name"] = "new value"
|
||||
|
||||
For more complex operations, please download the source code and compile it for execution.
|
||||
"""
|
||||
|
||||
# 请在下面编写你的代码,不要有代码缩进!!! | Please write your code below, do not indent the code!!!
|
||||
|
||||
# 导包 | Import packages
|
||||
from selenium.common.exceptions import ElementClickInterceptedException
|
||||
|
||||
# 定义一个函数 | Define a function
|
||||
def test(n = 0):
|
||||
for i in range(0, n):
|
||||
if i % 2 == 0:
|
||||
print(i)
|
||||
return "test"
|
||||
|
||||
# 异常捕获 | Exception capture
|
||||
try:
|
||||
# 使用XPath定位元素并点击浏览器中元素 | Use XPath to locate the element and click the element in the browser
|
||||
element = self.browser.find_element(By.XPATH, "//*[contains(@class, 'LeftSide_menu_list__qXCeM')]/div[1]/a[1]") # 这里请忽略IDE的报错,因为代码是嵌入到程序中的,IDE无法识别self变量和By变量是正常的 | Please ignore the error reported by the IDE, because the code is embedded in the program, and the IDE cannot recognize that the self variable and By variable are normal
|
||||
element.click()
|
||||
print("点击成功|Click success")
|
||||
except ElementClickInterceptedException:
|
||||
# 如果元素被遮挡,点击失败
|
||||
print("元素被遮挡,无法点击|The element is blocked and cannot be clicked")
|
||||
except Exception as e:
|
||||
# 打印其他异常
|
||||
print("发生了一个异常|An exception occurred", e)
|
||||
finally:
|
||||
# 测试函数 | Test function
|
||||
self.a = 1
|
||||
print("a = ", self.a)
|
||||
self.a = self.a + 1
|
||||
print("a = ", self.a)
|
||||
print("All parameters:", self.outputParameters)
|
||||
print(test(3))
|
||||
print("执行完毕|Execution completed")
|
Binary file not shown.
Binary file not shown.
@ -1 +1 @@
|
||||
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\ElectronJS\\user_data"}
|
||||
{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","debug":false,"copyright":1,"sys_version":"x64","mysql_config_path":"./mysql_config.json","absolute_user_data_folder":"/Users/naibo/Documents/EasySpider/ElectronJS/user_data"}
|
10
ElectronJS/package-lock.json
generated
10
ElectronJS/package-lock.json
generated
@ -15,6 +15,7 @@
|
||||
"formidable": "^3.5.0",
|
||||
"http": "^0.0.1-security",
|
||||
"multer": "^1.4.5-lts.1",
|
||||
"node-abi": "^3.52.0",
|
||||
"node-window-manager": "^2.2.4",
|
||||
"selenium-webdriver": "^4.16.0",
|
||||
"ws": "^8.12.0",
|
||||
@ -3914,9 +3915,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/node-abi": {
|
||||
"version": "3.45.0",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"version": "3.52.0",
|
||||
"resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.52.0.tgz",
|
||||
"integrity": "sha512-JJ98b02z16ILv7859irtXn4oUaFWADtvkzy2c0IAatNVX2Mc9Yoh8z6hZInn3QwvMEYhHuQloYi+TTQy67SIdQ==",
|
||||
"dependencies": {
|
||||
"semver": "^7.3.5"
|
||||
},
|
||||
@ -4814,7 +4815,6 @@
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.5.3",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"lru-cache": "^6.0.0"
|
||||
@ -4834,7 +4834,6 @@
|
||||
},
|
||||
"node_modules/semver/node_modules/lru-cache": {
|
||||
"version": "6.0.0",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"yallist": "^4.0.0"
|
||||
@ -5665,7 +5664,6 @@
|
||||
},
|
||||
"node_modules/yallist": {
|
||||
"version": "4.0.0",
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/yargs": {
|
||||
|
@ -37,6 +37,7 @@
|
||||
"formidable": "^3.5.0",
|
||||
"http": "^0.0.1-security",
|
||||
"multer": "^1.4.5-lts.1",
|
||||
"node-abi": "^3.52.0",
|
||||
"node-window-manager": "^2.2.4",
|
||||
"selenium-webdriver": "^4.16.0",
|
||||
"ws": "^8.12.0",
|
||||
@ -79,4 +80,4 @@
|
||||
"publishers": []
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
1
ElectronJS/tasks/229.json
Normal file
1
ElectronJS/tasks/229.json
Normal file
@ -0,0 +1 @@
|
||||
{"id":229,"name":"知乎 - 有问题,就会有答案","url":"https://www.zhihu.com","links":"https://www.zhihu.com","create_time":"07/12/2023, 03:26:24","update_time":"07/12/2023, 03:43:34","version":"0.6.0","saveThreshold":10,"quitWaitTime":6,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"pauseKey":"t","containJudge":false,"desc":"https://www.zhihu.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.zhihu.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.zhihu.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.zhihu.com","links":"https://www.zhihu.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环采集数据","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/main[1]/div[1]/div[2]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/h2[1]/div[1]","//div[contains(., '死刑执行前可以谎称肚')]","/html/body/div[last()-7]/div/main/div/div/div[last()-1]/div/div/div/div/div/div[last()-12]/div/div/div/div/h2/div"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"死刑执行前可以谎称肚子痛,想排泄粪便,籍此拖延时间吗?"}],"unique_index":"onlvi030w9jlpu5tjzb","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
|
@ -48,7 +48,7 @@ def copy_folder(source_folder, destination_folder):
|
||||
|
||||
|
||||
def get_chrome_version():
|
||||
version = "115"
|
||||
version = "120"
|
||||
if sys.platform == "win32":
|
||||
version_re = re.compile(r"^[1-9]\d*\.\d*.\d*")
|
||||
try:
|
||||
|
3
ExecuteStage/.vscode/launch.json
vendored
3
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,8 @@
|
||||
"justMyCode": false,
|
||||
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
"args": ["--ids", "[52]", "--headless", "0", "--user_data", "1", "--keyboard", "1"]
|
||||
// "args": ["--ids", "[1]", "--headless", "0", "--user_data", "1", "--keyboard", "1"]
|
||||
"args": "--ids '[3]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
|
||||
}
|
||||
]
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user