mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-22 14:03:52 +08:00
IFrame Alpha
This commit is contained in:
parent
e54e25172a
commit
11cda1bc1f
34
ElectronJS/src/taskGrid/test_pages/iframe.html
Normal file
34
ElectronJS/src/taskGrid/test_pages/iframe.html
Normal file
@ -0,0 +1,34 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Dynamic Iframe</title>
|
||||
<script>
|
||||
window.onload = function() {
|
||||
var urlParams = new URLSearchParams(window.location.search);
|
||||
var address = urlParams.get('address');
|
||||
if (address) {
|
||||
document.getElementById('myIframe').src = address;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<h1>IFRAME TEST</h1>
|
||||
<table border="1">
|
||||
<tr>
|
||||
<th>Header 1</th>
|
||||
<th>Header 2</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Row 1, Cell 1</td>
|
||||
<td>Row 1, Cell 2</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Row 2, Cell 1</td>
|
||||
<td>Row 2, Cell 2</td>
|
||||
</tr>
|
||||
</table>
|
||||
<iframe id="myIframe" src="https://www.easyspider.cn" style="width: 100%; min-height: 600px;"></iframe>
|
||||
<iframe src="https://www.easyspider.cn" style="width: 100%; min-height: 1200px;"></iframe>
|
||||
</body>
|
||||
</html>
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/124.json
Normal file
1
ElectronJS/tasks/124.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/125.json
Normal file
1
ElectronJS/tasks/125.json
Normal file
File diff suppressed because one or more lines are too long
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -10,7 +10,7 @@
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true,
|
||||
"args": ["--id", "[18]", "--read_type", "remote", "--headless", "0"]
|
||||
"args": ["--id", "[29]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
// "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"]
|
||||
}
|
||||
|
@ -123,11 +123,11 @@ class BrowserThread(Thread):
|
||||
except:
|
||||
para["iframe"] = False
|
||||
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
|
||||
# iframe中提取数据的绝对寻址操作不可优化
|
||||
if para["relative"] == False and para["iframe"] == True:
|
||||
para["optimizable"] = False
|
||||
else:
|
||||
para["optimizable"] = True
|
||||
# # iframe中提取数据的绝对寻址操作不可优化
|
||||
# if para["relative"] == False and para["iframe"] == True:
|
||||
# para["optimizable"] = False
|
||||
# else:
|
||||
para["optimizable"] = True
|
||||
else:
|
||||
para["optimizable"] = False
|
||||
|
||||
@ -465,8 +465,8 @@ class BrowserThread(Thread):
|
||||
'return history.length') # 记录本次循环内的history的length
|
||||
self.history["index"] = thisHistoryLength
|
||||
self.history["handle"] = thisHandle
|
||||
if node["parameters"]["iframe"]:
|
||||
self.browser.switch_to.default_content() # 循环前切换到主文档
|
||||
# if node["parameters"]["iframe"]:
|
||||
# self.browser.switch_to.default_content() # 循环前切换到主文档
|
||||
if int(node["parameters"]["loopType"]) == 0: # 单个元素循环
|
||||
# 无跳转标签页操作
|
||||
count = 0 # 执行次数
|
||||
@ -651,8 +651,8 @@ class BrowserThread(Thread):
|
||||
self.executeNode(i, code, node["parameters"]["xpath"], 0)
|
||||
self.history["index"] = thisHistoryLength
|
||||
self.history["handle"] = self.browser.current_window_handle
|
||||
if node["parameters"]["iframe"]:
|
||||
self.browser.switch_to.default_content()
|
||||
# if node["parameters"]["iframe"]:
|
||||
# self.browser.switch_to.default_content()
|
||||
self.scrollDown(node["parameters"])
|
||||
|
||||
# 打开网页事件
|
||||
@ -840,8 +840,8 @@ class BrowserThread(Thread):
|
||||
self.recordLog("Cannot find element:" +
|
||||
path + ", please try to set the wait time before executing this operation")
|
||||
print("找不到要点击的元素:" + path + ",请尝试在执行此操作前设置等待时间")
|
||||
if para["iframe"]:
|
||||
self.browser.switch_to.default_content()
|
||||
# if para["iframe"]:
|
||||
# self.browser.switch_to.default_content()
|
||||
waitTime = float(para["wait"]) + 0.01 # 点击之后等待
|
||||
try:
|
||||
waitType = int(para["waitType"])
|
||||
@ -1034,6 +1034,10 @@ class BrowserThread(Thread):
|
||||
for p in para["paras"]:
|
||||
if p["optimizable"]:
|
||||
try:
|
||||
# 只有当前环境不变变化才可以快速提取数据
|
||||
if self.browser.iframe_env != p["iframe"]:
|
||||
p["optimizable"] = False
|
||||
continue
|
||||
p["relativeXPath"] = p["relativeXPath"].lower()
|
||||
if p["nodeType"] == 2:
|
||||
xpath = p["relativeXPath"] + "/@href"
|
||||
|
@ -19,7 +19,15 @@ desired_capabilities["pageLoadStrategy"] = "none"
|
||||
|
||||
|
||||
class MyChrome(webdriver.Chrome):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.iframe_env = False # 现在的环境是root还是iframe
|
||||
super().__init__(*args, **kwargs) # 调用父类的 __init__
|
||||
|
||||
def find_element(self, by=By.ID, value=None, iframe=False):
|
||||
if self.iframe_env:
|
||||
super().switch_to.default_content()
|
||||
self.iframe_env = False
|
||||
# 在这里改变查找元素的行为
|
||||
if iframe:
|
||||
# 获取所有的 iframe
|
||||
@ -32,6 +40,7 @@ class MyChrome(webdriver.Chrome):
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
try:
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
@ -49,6 +58,9 @@ class MyChrome(webdriver.Chrome):
|
||||
return super().find_element(by=by, value=value)
|
||||
|
||||
def find_elements(self, by=By.ID, value=None, iframe=False):
|
||||
if self.iframe_env:
|
||||
super().switch_to.default_content()
|
||||
self.iframe_env = False
|
||||
# 在这里改变查找元素的行为
|
||||
if iframe:
|
||||
# 获取所有的 iframe
|
||||
@ -58,6 +70,7 @@ class MyChrome(webdriver.Chrome):
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
super().switch_to.frame(iframe)
|
||||
self.iframe_env = True
|
||||
try:
|
||||
# 在 iframe 中查找并点击元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
|
Loading…
x
Reference in New Issue
Block a user