IFrame Alpha

This commit is contained in:
NaiboWang-Alienware 2023-07-04 21:20:01 +08:00
parent e54e25172a
commit 11cda1bc1f
8 changed files with 67 additions and 392 deletions

View File

@ -0,0 +1,34 @@
<!DOCTYPE html>
<html>
<head>
<title>Dynamic Iframe</title>
<script>
window.onload = function() {
var urlParams = new URLSearchParams(window.location.search);
var address = urlParams.get('address');
if (address) {
document.getElementById('myIframe').src = address;
}
}
</script>
</head>
<body>
<h1>IFRAME TEST</h1>
<table border="1">
<tr>
<th>Header 1</th>
<th>Header 2</th>
</tr>
<tr>
<td>Row 1, Cell 1</td>
<td>Row 1, Cell 2</td>
</tr>
<tr>
<td>Row 2, Cell 1</td>
<td>Row 2, Cell 2</td>
</tr>
</table>
<iframe id="myIframe" src="https://www.easyspider.cn" style="width: 100%; min-height: 600px;"></iframe>
<iframe src="https://www.easyspider.cn" style="width: 100%; min-height: 1200px;"></iframe>
</body>
</html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -10,7 +10,7 @@
"program": "${file}", "program": "${file}",
"console": "integratedTerminal", "console": "integratedTerminal",
"justMyCode": true, "justMyCode": true,
"args": ["--id", "[18]", "--read_type", "remote", "--headless", "0"] "args": ["--id", "[29]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] // "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
// "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"] // "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"]
} }

View File

@ -123,11 +123,11 @@ class BrowserThread(Thread):
except: except:
para["iframe"] = False para["iframe"] = False
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2: if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
# iframe中提取数据的绝对寻址操作不可优化 # # iframe中提取数据的绝对寻址操作不可优化
if para["relative"] == False and para["iframe"] == True: # if para["relative"] == False and para["iframe"] == True:
para["optimizable"] = False # para["optimizable"] = False
else: # else:
para["optimizable"] = True para["optimizable"] = True
else: else:
para["optimizable"] = False para["optimizable"] = False
@ -465,8 +465,8 @@ class BrowserThread(Thread):
'return history.length') # 记录本次循环内的history的length 'return history.length') # 记录本次循环内的history的length
self.history["index"] = thisHistoryLength self.history["index"] = thisHistoryLength
self.history["handle"] = thisHandle self.history["handle"] = thisHandle
if node["parameters"]["iframe"]: # if node["parameters"]["iframe"]:
self.browser.switch_to.default_content() # 循环前切换到主文档 # self.browser.switch_to.default_content() # 循环前切换到主文档
if int(node["parameters"]["loopType"]) == 0: # 单个元素循环 if int(node["parameters"]["loopType"]) == 0: # 单个元素循环
# 无跳转标签页操作 # 无跳转标签页操作
count = 0 # 执行次数 count = 0 # 执行次数
@ -651,8 +651,8 @@ class BrowserThread(Thread):
self.executeNode(i, code, node["parameters"]["xpath"], 0) self.executeNode(i, code, node["parameters"]["xpath"], 0)
self.history["index"] = thisHistoryLength self.history["index"] = thisHistoryLength
self.history["handle"] = self.browser.current_window_handle self.history["handle"] = self.browser.current_window_handle
if node["parameters"]["iframe"]: # if node["parameters"]["iframe"]:
self.browser.switch_to.default_content() # self.browser.switch_to.default_content()
self.scrollDown(node["parameters"]) self.scrollDown(node["parameters"])
# 打开网页事件 # 打开网页事件
@ -840,8 +840,8 @@ class BrowserThread(Thread):
self.recordLog("Cannot find element:" + self.recordLog("Cannot find element:" +
path + ", please try to set the wait time before executing this operation") path + ", please try to set the wait time before executing this operation")
print("找不到要点击的元素:" + path + ",请尝试在执行此操作前设置等待时间") print("找不到要点击的元素:" + path + ",请尝试在执行此操作前设置等待时间")
if para["iframe"]: # if para["iframe"]:
self.browser.switch_to.default_content() # self.browser.switch_to.default_content()
waitTime = float(para["wait"]) + 0.01 # 点击之后等待 waitTime = float(para["wait"]) + 0.01 # 点击之后等待
try: try:
waitType = int(para["waitType"]) waitType = int(para["waitType"])
@ -1034,6 +1034,10 @@ class BrowserThread(Thread):
for p in para["paras"]: for p in para["paras"]:
if p["optimizable"]: if p["optimizable"]:
try: try:
# 只有当前环境不变变化才可以快速提取数据
if self.browser.iframe_env != p["iframe"]:
p["optimizable"] = False
continue
p["relativeXPath"] = p["relativeXPath"].lower() p["relativeXPath"] = p["relativeXPath"].lower()
if p["nodeType"] == 2: if p["nodeType"] == 2:
xpath = p["relativeXPath"] + "/@href" xpath = p["relativeXPath"] + "/@href"

View File

@ -19,7 +19,15 @@ desired_capabilities["pageLoadStrategy"] = "none"
class MyChrome(webdriver.Chrome): class MyChrome(webdriver.Chrome):
def __init__(self, *args, **kwargs):
self.iframe_env = False # 现在的环境是root还是iframe
super().__init__(*args, **kwargs) # 调用父类的 __init__
def find_element(self, by=By.ID, value=None, iframe=False): def find_element(self, by=By.ID, value=None, iframe=False):
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False
# 在这里改变查找元素的行为 # 在这里改变查找元素的行为
if iframe: if iframe:
# 获取所有的 iframe # 获取所有的 iframe
@ -32,6 +40,7 @@ class MyChrome(webdriver.Chrome):
for iframe in iframes: for iframe in iframes:
# 切换到 iframe # 切换到 iframe
super().switch_to.frame(iframe) super().switch_to.frame(iframe)
self.iframe_env = True
try: try:
# 在 iframe 中查找并点击元素 # 在 iframe 中查找并点击元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
@ -49,6 +58,9 @@ class MyChrome(webdriver.Chrome):
return super().find_element(by=by, value=value) return super().find_element(by=by, value=value)
def find_elements(self, by=By.ID, value=None, iframe=False): def find_elements(self, by=By.ID, value=None, iframe=False):
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False
# 在这里改变查找元素的行为 # 在这里改变查找元素的行为
if iframe: if iframe:
# 获取所有的 iframe # 获取所有的 iframe
@ -58,6 +70,7 @@ class MyChrome(webdriver.Chrome):
for iframe in iframes: for iframe in iframes:
# 切换到 iframe # 切换到 iframe
super().switch_to.frame(iframe) super().switch_to.frame(iframe)
self.iframe_env = True
try: try:
# 在 iframe 中查找并点击元素 # 在 iframe 中查找并点击元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素 # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素