IFrame Alpha

This commit is contained in:
NaiboWang-Alienware 2023-07-04 21:20:01 +08:00
parent e54e25172a
commit 11cda1bc1f
8 changed files with 67 additions and 392 deletions

View File

@ -0,0 +1,34 @@
<!DOCTYPE html>
<html>
<head>
<title>Dynamic Iframe</title>
<script>
window.onload = function() {
var urlParams = new URLSearchParams(window.location.search);
var address = urlParams.get('address');
if (address) {
document.getElementById('myIframe').src = address;
}
}
</script>
</head>
<body>
<h1>IFRAME TEST</h1>
<table border="1">
<tr>
<th>Header 1</th>
<th>Header 2</th>
</tr>
<tr>
<td>Row 1, Cell 1</td>
<td>Row 1, Cell 2</td>
</tr>
<tr>
<td>Row 2, Cell 1</td>
<td>Row 2, Cell 2</td>
</tr>
</table>
<iframe id="myIframe" src="https://www.easyspider.cn" style="width: 100%; min-height: 600px;"></iframe>
<iframe src="https://www.easyspider.cn" style="width: 100%; min-height: 1200px;"></iframe>
</body>
</html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -10,7 +10,7 @@
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true,
"args": ["--id", "[18]", "--read_type", "remote", "--headless", "0"]
"args": ["--id", "[29]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
// "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"]
}

View File

@ -123,11 +123,11 @@ class BrowserThread(Thread):
except:
para["iframe"] = False
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
# iframe中提取数据的绝对寻址操作不可优化
if para["relative"] == False and para["iframe"] == True:
para["optimizable"] = False
else:
para["optimizable"] = True
# # iframe中提取数据的绝对寻址操作不可优化
# if para["relative"] == False and para["iframe"] == True:
# para["optimizable"] = False
# else:
para["optimizable"] = True
else:
para["optimizable"] = False
@ -465,8 +465,8 @@ class BrowserThread(Thread):
'return history.length') # 记录本次循环内的history的length
self.history["index"] = thisHistoryLength
self.history["handle"] = thisHandle
if node["parameters"]["iframe"]:
self.browser.switch_to.default_content() # 循环前切换到主文档
# if node["parameters"]["iframe"]:
# self.browser.switch_to.default_content() # 循环前切换到主文档
if int(node["parameters"]["loopType"]) == 0: # 单个元素循环
# 无跳转标签页操作
count = 0 # 执行次数
@ -651,8 +651,8 @@ class BrowserThread(Thread):
self.executeNode(i, code, node["parameters"]["xpath"], 0)
self.history["index"] = thisHistoryLength
self.history["handle"] = self.browser.current_window_handle
if node["parameters"]["iframe"]:
self.browser.switch_to.default_content()
# if node["parameters"]["iframe"]:
# self.browser.switch_to.default_content()
self.scrollDown(node["parameters"])
# 打开网页事件
@ -840,8 +840,8 @@ class BrowserThread(Thread):
self.recordLog("Cannot find element:" +
path + ", please try to set the wait time before executing this operation")
print("找不到要点击的元素:" + path + ",请尝试在执行此操作前设置等待时间")
if para["iframe"]:
self.browser.switch_to.default_content()
# if para["iframe"]:
# self.browser.switch_to.default_content()
waitTime = float(para["wait"]) + 0.01 # 点击之后等待
try:
waitType = int(para["waitType"])
@ -1034,6 +1034,10 @@ class BrowserThread(Thread):
for p in para["paras"]:
if p["optimizable"]:
try:
# 只有当前环境不变变化才可以快速提取数据
if self.browser.iframe_env != p["iframe"]:
p["optimizable"] = False
continue
p["relativeXPath"] = p["relativeXPath"].lower()
if p["nodeType"] == 2:
xpath = p["relativeXPath"] + "/@href"

View File

@ -19,7 +19,15 @@ desired_capabilities["pageLoadStrategy"] = "none"
class MyChrome(webdriver.Chrome):
def __init__(self, *args, **kwargs):
self.iframe_env = False # 现在的环境是root还是iframe
super().__init__(*args, **kwargs) # 调用父类的 __init__
def find_element(self, by=By.ID, value=None, iframe=False):
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False
# 在这里改变查找元素的行为
if iframe:
# 获取所有的 iframe
@ -32,6 +40,7 @@ class MyChrome(webdriver.Chrome):
for iframe in iframes:
# 切换到 iframe
super().switch_to.frame(iframe)
self.iframe_env = True
try:
# 在 iframe 中查找并点击元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
@ -49,6 +58,9 @@ class MyChrome(webdriver.Chrome):
return super().find_element(by=by, value=value)
def find_elements(self, by=By.ID, value=None, iframe=False):
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False
# 在这里改变查找元素的行为
if iframe:
# 获取所有的 iframe
@ -58,6 +70,7 @@ class MyChrome(webdriver.Chrome):
for iframe in iframes:
# 切换到 iframe
super().switch_to.frame(iframe)
self.iframe_env = True
try:
# 在 iframe 中查找并点击元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素