mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-22 21:10:28 +08:00
IFrame Alpha
This commit is contained in:
parent
e54e25172a
commit
11cda1bc1f
34
ElectronJS/src/taskGrid/test_pages/iframe.html
Normal file
34
ElectronJS/src/taskGrid/test_pages/iframe.html
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Dynamic Iframe</title>
|
||||||
|
<script>
|
||||||
|
window.onload = function() {
|
||||||
|
var urlParams = new URLSearchParams(window.location.search);
|
||||||
|
var address = urlParams.get('address');
|
||||||
|
if (address) {
|
||||||
|
document.getElementById('myIframe').src = address;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>IFRAME TEST</h1>
|
||||||
|
<table border="1">
|
||||||
|
<tr>
|
||||||
|
<th>Header 1</th>
|
||||||
|
<th>Header 2</th>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Row 1, Cell 1</td>
|
||||||
|
<td>Row 1, Cell 2</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Row 2, Cell 1</td>
|
||||||
|
<td>Row 2, Cell 2</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<iframe id="myIframe" src="https://www.easyspider.cn" style="width: 100%; min-height: 600px;"></iframe>
|
||||||
|
<iframe src="https://www.easyspider.cn" style="width: 100%; min-height: 1200px;"></iframe>
|
||||||
|
</body>
|
||||||
|
</html>
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/124.json
Normal file
1
ElectronJS/tasks/124.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/125.json
Normal file
1
ElectronJS/tasks/125.json
Normal file
File diff suppressed because one or more lines are too long
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -10,7 +10,7 @@
|
|||||||
"program": "${file}",
|
"program": "${file}",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"justMyCode": true,
|
"justMyCode": true,
|
||||||
"args": ["--id", "[18]", "--read_type", "remote", "--headless", "0"]
|
"args": ["--id", "[29]", "--read_type", "remote", "--headless", "0"]
|
||||||
// "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
// "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||||
// "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"]
|
// "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"]
|
||||||
}
|
}
|
||||||
|
@ -123,11 +123,11 @@ class BrowserThread(Thread):
|
|||||||
except:
|
except:
|
||||||
para["iframe"] = False
|
para["iframe"] = False
|
||||||
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
|
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
|
||||||
# iframe中提取数据的绝对寻址操作不可优化
|
# # iframe中提取数据的绝对寻址操作不可优化
|
||||||
if para["relative"] == False and para["iframe"] == True:
|
# if para["relative"] == False and para["iframe"] == True:
|
||||||
para["optimizable"] = False
|
# para["optimizable"] = False
|
||||||
else:
|
# else:
|
||||||
para["optimizable"] = True
|
para["optimizable"] = True
|
||||||
else:
|
else:
|
||||||
para["optimizable"] = False
|
para["optimizable"] = False
|
||||||
|
|
||||||
@ -465,8 +465,8 @@ class BrowserThread(Thread):
|
|||||||
'return history.length') # 记录本次循环内的history的length
|
'return history.length') # 记录本次循环内的history的length
|
||||||
self.history["index"] = thisHistoryLength
|
self.history["index"] = thisHistoryLength
|
||||||
self.history["handle"] = thisHandle
|
self.history["handle"] = thisHandle
|
||||||
if node["parameters"]["iframe"]:
|
# if node["parameters"]["iframe"]:
|
||||||
self.browser.switch_to.default_content() # 循环前切换到主文档
|
# self.browser.switch_to.default_content() # 循环前切换到主文档
|
||||||
if int(node["parameters"]["loopType"]) == 0: # 单个元素循环
|
if int(node["parameters"]["loopType"]) == 0: # 单个元素循环
|
||||||
# 无跳转标签页操作
|
# 无跳转标签页操作
|
||||||
count = 0 # 执行次数
|
count = 0 # 执行次数
|
||||||
@ -651,8 +651,8 @@ class BrowserThread(Thread):
|
|||||||
self.executeNode(i, code, node["parameters"]["xpath"], 0)
|
self.executeNode(i, code, node["parameters"]["xpath"], 0)
|
||||||
self.history["index"] = thisHistoryLength
|
self.history["index"] = thisHistoryLength
|
||||||
self.history["handle"] = self.browser.current_window_handle
|
self.history["handle"] = self.browser.current_window_handle
|
||||||
if node["parameters"]["iframe"]:
|
# if node["parameters"]["iframe"]:
|
||||||
self.browser.switch_to.default_content()
|
# self.browser.switch_to.default_content()
|
||||||
self.scrollDown(node["parameters"])
|
self.scrollDown(node["parameters"])
|
||||||
|
|
||||||
# 打开网页事件
|
# 打开网页事件
|
||||||
@ -840,8 +840,8 @@ class BrowserThread(Thread):
|
|||||||
self.recordLog("Cannot find element:" +
|
self.recordLog("Cannot find element:" +
|
||||||
path + ", please try to set the wait time before executing this operation")
|
path + ", please try to set the wait time before executing this operation")
|
||||||
print("找不到要点击的元素:" + path + ",请尝试在执行此操作前设置等待时间")
|
print("找不到要点击的元素:" + path + ",请尝试在执行此操作前设置等待时间")
|
||||||
if para["iframe"]:
|
# if para["iframe"]:
|
||||||
self.browser.switch_to.default_content()
|
# self.browser.switch_to.default_content()
|
||||||
waitTime = float(para["wait"]) + 0.01 # 点击之后等待
|
waitTime = float(para["wait"]) + 0.01 # 点击之后等待
|
||||||
try:
|
try:
|
||||||
waitType = int(para["waitType"])
|
waitType = int(para["waitType"])
|
||||||
@ -1034,6 +1034,10 @@ class BrowserThread(Thread):
|
|||||||
for p in para["paras"]:
|
for p in para["paras"]:
|
||||||
if p["optimizable"]:
|
if p["optimizable"]:
|
||||||
try:
|
try:
|
||||||
|
# 只有当前环境不变变化才可以快速提取数据
|
||||||
|
if self.browser.iframe_env != p["iframe"]:
|
||||||
|
p["optimizable"] = False
|
||||||
|
continue
|
||||||
p["relativeXPath"] = p["relativeXPath"].lower()
|
p["relativeXPath"] = p["relativeXPath"].lower()
|
||||||
if p["nodeType"] == 2:
|
if p["nodeType"] == 2:
|
||||||
xpath = p["relativeXPath"] + "/@href"
|
xpath = p["relativeXPath"] + "/@href"
|
||||||
|
@ -19,7 +19,15 @@ desired_capabilities["pageLoadStrategy"] = "none"
|
|||||||
|
|
||||||
|
|
||||||
class MyChrome(webdriver.Chrome):
|
class MyChrome(webdriver.Chrome):
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.iframe_env = False # 现在的环境是root还是iframe
|
||||||
|
super().__init__(*args, **kwargs) # 调用父类的 __init__
|
||||||
|
|
||||||
def find_element(self, by=By.ID, value=None, iframe=False):
|
def find_element(self, by=By.ID, value=None, iframe=False):
|
||||||
|
if self.iframe_env:
|
||||||
|
super().switch_to.default_content()
|
||||||
|
self.iframe_env = False
|
||||||
# 在这里改变查找元素的行为
|
# 在这里改变查找元素的行为
|
||||||
if iframe:
|
if iframe:
|
||||||
# 获取所有的 iframe
|
# 获取所有的 iframe
|
||||||
@ -32,6 +40,7 @@ class MyChrome(webdriver.Chrome):
|
|||||||
for iframe in iframes:
|
for iframe in iframes:
|
||||||
# 切换到 iframe
|
# 切换到 iframe
|
||||||
super().switch_to.frame(iframe)
|
super().switch_to.frame(iframe)
|
||||||
|
self.iframe_env = True
|
||||||
try:
|
try:
|
||||||
# 在 iframe 中查找并点击元素
|
# 在 iframe 中查找并点击元素
|
||||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||||
@ -49,6 +58,9 @@ class MyChrome(webdriver.Chrome):
|
|||||||
return super().find_element(by=by, value=value)
|
return super().find_element(by=by, value=value)
|
||||||
|
|
||||||
def find_elements(self, by=By.ID, value=None, iframe=False):
|
def find_elements(self, by=By.ID, value=None, iframe=False):
|
||||||
|
if self.iframe_env:
|
||||||
|
super().switch_to.default_content()
|
||||||
|
self.iframe_env = False
|
||||||
# 在这里改变查找元素的行为
|
# 在这里改变查找元素的行为
|
||||||
if iframe:
|
if iframe:
|
||||||
# 获取所有的 iframe
|
# 获取所有的 iframe
|
||||||
@ -58,6 +70,7 @@ class MyChrome(webdriver.Chrome):
|
|||||||
for iframe in iframes:
|
for iframe in iframes:
|
||||||
# 切换到 iframe
|
# 切换到 iframe
|
||||||
super().switch_to.frame(iframe)
|
super().switch_to.frame(iframe)
|
||||||
|
self.iframe_env = True
|
||||||
try:
|
try:
|
||||||
# 在 iframe 中查找并点击元素
|
# 在 iframe 中查找并点击元素
|
||||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||||
|
Loading…
x
Reference in New Issue
Block a user