IFrame Alpha

2025-04-22 14:03:52 +08:00 · 2023-07-04 21:20:01 +08:00 · 2023-07-04 21:20:01 +08:00 · 11cda1bc1f
commit 11cda1bc1f
parent e54e25172a
8 changed files with 67 additions and 392 deletions
--- a/ElectronJS/src/taskGrid/test_pages/iframe.html
+++ b/ElectronJS/src/taskGrid/test_pages/iframe.html
@ -0,0 +1,34 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Dynamic Iframe</title>
+    <script>
+        window.onload = function() {
+            var urlParams = new URLSearchParams(window.location.search);
+            var address = urlParams.get('address');
+            if (address) {
+                document.getElementById('myIframe').src = address;
+            }
+        }
+    </script>
+</head>
+<body>
+    <h1>IFRAME TEST</h1>
+  <table border="1">
+        <tr>
+            <th>Header 1</th>
+            <th>Header 2</th>
+        </tr>
+        <tr>
+            <td>Row 1, Cell 1</td>
+            <td>Row 1, Cell 2</td>
+        </tr>
+        <tr>
+            <td>Row 2, Cell 1</td>
+            <td>Row 2, Cell 2</td>
+        </tr>
+    </table>    
+<iframe  id="myIframe" src="https://www.easyspider.cn" style="width: 100%; min-height: 600px;"></iframe>
+    <iframe src="https://www.easyspider.cn" style="width: 100%; min-height: 1200px;"></iframe>
+</body>
+</html>
--- a/ElectronJS/tasks/117.json
+++ b/ElectronJS/tasks/117.json
--- a/ElectronJS/tasks/118.json
+++ b/ElectronJS/tasks/118.json
--- a/ElectronJS/tasks/124.json
+++ b/ElectronJS/tasks/124.json
--- a/ElectronJS/tasks/125.json
+++ b/ElectronJS/tasks/125.json
--- a/ExecuteStage/.vscode/launch.json
+++ b/ExecuteStage/.vscode/launch.json
@ -10,7 +10,7 @@
            "program": "${file}",
            "console": "integratedTerminal",
            "justMyCode": true,
-             "args": ["--id", "[18]", "--read_type", "remote", "--headless", "0"]
+             "args": ["--id", "[29]", "--read_type", "remote", "--headless", "0"]
            // "args": ["--id", "[2]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
            // "args": ["--id", "[44]", "--headless", "0", "--user_data", "1"]
        }
--- a/ExecuteStage/easyspider_executestage.py
+++ b/ExecuteStage/easyspider_executestage.py
@ -123,11 +123,11 @@ class BrowserThread(Thread):
                    except:
                        para["iframe"] = False
                    if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1 and para["nodeType"] <= 2:
-                        # iframe中提取数据的绝对寻址操作不可优化
-                        if para["relative"] == False and para["iframe"] == True:
-                            para["optimizable"] = False
-                        else:
-                            para["optimizable"] = True
+                        # # iframe中提取数据的绝对寻址操作不可优化
+                        # if para["relative"] == False and para["iframe"] == True:
+                        #     para["optimizable"] = False
+                        # else:
+                        para["optimizable"] = True
                    else:
                        para["optimizable"] = False

@ -465,8 +465,8 @@ class BrowserThread(Thread):
            'return history.length')  # 记录本次循环内的history的length
        self.history["index"] = thisHistoryLength
        self.history["handle"] = thisHandle
-        if node["parameters"]["iframe"]:
-            self.browser.switch_to.default_content()  # 循环前切换到主文档
+        # if node["parameters"]["iframe"]:
+        #     self.browser.switch_to.default_content()  # 循环前切换到主文档
        if int(node["parameters"]["loopType"]) == 0:  # 单个元素循环
            # 无跳转标签页操作
            count = 0  # 执行次数
@ -651,8 +651,8 @@ class BrowserThread(Thread):
                    self.executeNode(i, code, node["parameters"]["xpath"], 0)
        self.history["index"] = thisHistoryLength
        self.history["handle"] = self.browser.current_window_handle
-        if node["parameters"]["iframe"]:
-            self.browser.switch_to.default_content()
+        # if node["parameters"]["iframe"]:
+        #     self.browser.switch_to.default_content()
        self.scrollDown(node["parameters"])

    # 打开网页事件
@ -840,8 +840,8 @@ class BrowserThread(Thread):
            self.recordLog("Cannot find element:" +
                           path + ", please try to set the wait time before executing this operation")
            print("找不到要点击的元素:" + path + "，请尝试在执行此操作前设置等待时间")
-        if para["iframe"]:
-            self.browser.switch_to.default_content()
+        # if para["iframe"]:
+        #     self.browser.switch_to.default_content()
        waitTime = float(para["wait"]) + 0.01  # 点击之后等待
        try:
            waitType = int(para["waitType"])
@ -1034,6 +1034,10 @@ class BrowserThread(Thread):
        for p in para["paras"]:
            if p["optimizable"]:
                try:
+                    # 只有当前环境不变变化才可以快速提取数据
+                    if self.browser.iframe_env != p["iframe"]:
+                        p["optimizable"] = False
+                        continue
                    p["relativeXPath"] = p["relativeXPath"].lower()
                    if p["nodeType"] == 2:
                        xpath = p["relativeXPath"] + "/@href"
--- a/ExecuteStage/myChrome.py
+++ b/ExecuteStage/myChrome.py
@ -19,7 +19,15 @@ desired_capabilities["pageLoadStrategy"] = "none"


 class MyChrome(webdriver.Chrome):
+
+    def __init__(self, *args, **kwargs):
+        self.iframe_env = False  # 现在的环境是root还是iframe
+        super().__init__(*args, **kwargs)  # 调用父类的 __init__
+
    def find_element(self, by=By.ID, value=None, iframe=False):
+        if self.iframe_env:
+            super().switch_to.default_content()
+            self.iframe_env = False
        # 在这里改变查找元素的行为
        if iframe:
            # 获取所有的 iframe
@ -32,6 +40,7 @@ class MyChrome(webdriver.Chrome):
            for iframe in iframes:
                # 切换到 iframe
                super().switch_to.frame(iframe)
+                self.iframe_env = True
                try:
                    # 在 iframe 中查找并点击元素
                    # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素
@ -49,6 +58,9 @@ class MyChrome(webdriver.Chrome):
            return super().find_element(by=by, value=value)

    def find_elements(self, by=By.ID, value=None, iframe=False):
+        if self.iframe_env:
+            super().switch_to.default_content()
+            self.iframe_env = False
        # 在这里改变查找元素的行为
        if iframe:
            # 获取所有的 iframe
@ -58,6 +70,7 @@ class MyChrome(webdriver.Chrome):
            for iframe in iframes:
                # 切换到 iframe
                super().switch_to.frame(iframe)
+                self.iframe_env = True
                try:
                    # 在 iframe 中查找并点击元素
                    # 在这个例子中，我们查找 XPath 为 '//div[1]' 的元素