mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-22 23:24:22 +08:00
Iframe Nested
This commit is contained in:
parent
ab0fad5b5a
commit
c3773848c3
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
.temp_to_pub/EasySpider_windows_x64/tasks/238.json
Normal file
1
.temp_to_pub/EasySpider_windows_x64/tasks/238.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/230.json
Normal file
1
ElectronJS/tasks/230.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/231.json
Normal file
1
ElectronJS/tasks/231.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/232.json
Normal file
1
ElectronJS/tasks/232.json
Normal file
File diff suppressed because one or more lines are too long
1
ElectronJS/tasks/233.json
Normal file
1
ElectronJS/tasks/233.json
Normal file
File diff suppressed because one or more lines are too long
4
ExecuteStage/.vscode/launch.json
vendored
4
ExecuteStage/.vscode/launch.json
vendored
@ -12,8 +12,8 @@
|
||||
"justMyCode": false,
|
||||
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||
// "args": ["--ids", "[1]", "--headless", "0", "--user_data", "1", "--keyboard", "1"]
|
||||
"args": "--ids '[3]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
|
||||
"args": ["--ids", "[149]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
|
||||
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
|
||||
}
|
||||
]
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
# import atexit
|
||||
import atexit
|
||||
import copy
|
||||
import platform
|
||||
import shutil
|
||||
import string
|
||||
import undetected_chromedriver as uc
|
||||
@ -1711,6 +1712,7 @@ class BrowserThread(Thread):
|
||||
p["relativeXPath"], self.outputParameters, self)
|
||||
# 只有当前环境不变变化才可以快速提取数据
|
||||
if self.browser.iframe_env != p["iframe"]:
|
||||
# if p["iframe"] or self.browser.iframe_env != p["iframe"]: # 如果是iframe,则不能快速提取数据,主要是各个上下文的iframe切换,但一般不会有人这么做
|
||||
p["optimizable"] = False
|
||||
continue
|
||||
# relativeXPath = relativeXPath.lower()
|
||||
@ -1820,7 +1822,7 @@ class BrowserThread(Thread):
|
||||
element = self.browser.find_element(
|
||||
By.XPATH, relativeXPath, iframe=p["iframe"])
|
||||
except (
|
||||
NoSuchElementException, InvalidSelectorException, StaleElementReferenceException): # 找不到元素的时候,使用默认值
|
||||
NoSuchElementException, InvalidSelectorException, StaleElementReferenceException) as e: # 找不到元素的时候,使用默认值
|
||||
# self.print_and_log(p)
|
||||
try:
|
||||
content = p["default"]
|
||||
@ -1835,6 +1837,7 @@ class BrowserThread(Thread):
|
||||
self.print_and_log(
|
||||
"提取数据操作时,字段名 %s 对应XPath %s 未找到,使用默认值,本字段将不再重复报错" % (
|
||||
p["name"], relativeXPath))
|
||||
self.dataNotFoundKeys[p["name"]] = True
|
||||
except:
|
||||
pass
|
||||
continue
|
||||
@ -1916,92 +1919,57 @@ if __name__ == '__main__':
|
||||
print(c)
|
||||
options = webdriver.ChromeOptions()
|
||||
driver_path = "chromedriver.exe"
|
||||
import platform
|
||||
|
||||
print(sys.platform, platform.architecture())
|
||||
# option = webdriver.ChromeOptions()
|
||||
if not os.path.exists(os.getcwd() + "/Data"):
|
||||
os.mkdir(os.getcwd() + "/Data")
|
||||
if sys.platform == "darwin" and platform.architecture()[0] == "64bit":
|
||||
options.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
|
||||
# MacOS需要用option而不是options!
|
||||
# option.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
|
||||
# option.add_extension(
|
||||
# "EasySpider.app/Contents/Resources/app/XPathHelper.crx")
|
||||
options.add_extension(
|
||||
"EasySpider.app/Contents/Resources/app/XPathHelper.crx")
|
||||
driver_path = "EasySpider.app/Contents/Resources/app/chromedriver_mac64"
|
||||
# options.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
|
||||
# # MacOS需要用option而不是options!
|
||||
# option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
|
||||
# driver_path = os.getcwd()+ "/chromedriver_mac64"
|
||||
print(driver_path)
|
||||
if c.config_folder == "":
|
||||
c.config_folder = os.path.expanduser(
|
||||
"~/Library/Application Support/EasySpider/")
|
||||
# print("Config folder for MacOS:", c.config_folder)
|
||||
elif os.path.exists(os.getcwd() + "/EasySpider/resources"): # 打包后的路径
|
||||
print("Finding chromedriver in EasySpider",
|
||||
os.getcwd() + "/EasySpider")
|
||||
if sys.platform == "win32" and platform.architecture()[0] == "32bit":
|
||||
options.binary_location = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置
|
||||
# option.binary_location = os.path.join(
|
||||
# os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置
|
||||
driver_path = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
|
||||
# option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
|
||||
options.binary_location = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
|
||||
# option.binary_location = os.path.join(
|
||||
# os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
|
||||
driver_path = os.path.join(
|
||||
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
|
||||
# option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
|
||||
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
|
||||
# option.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
|
||||
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
|
||||
# option.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
|
||||
else:
|
||||
print("Unsupported platform")
|
||||
sys.exit()
|
||||
print("Chrome location:", options.binary_location)
|
||||
print("Chromedriver location:", driver_path)
|
||||
# elif os.getcwd().find("ExecuteStage") >= 0: # 如果直接执行
|
||||
# print("Finding chromedriver in ./Chrome",
|
||||
# os.getcwd()+"/Chrome")
|
||||
# options.binary_location = "./Chrome/chrome.exe" # 指定chrome位置
|
||||
# # option.binary_location = "C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe"
|
||||
# driver_path = "./Chrome/chromedriver.exe"
|
||||
elif os.path.exists(os.getcwd() + "/../ElectronJS"):
|
||||
# 软件dev用
|
||||
print("Finding chromedriver in EasySpider",
|
||||
os.getcwd() + "/ElectronJS")
|
||||
# option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
|
||||
options.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
|
||||
driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
|
||||
# option.add_extension("../ElectronJS/XPathHelper.crx")
|
||||
options.add_extension("../ElectronJS/XPathHelper.crx")
|
||||
else:
|
||||
options.binary_location = "./chrome.exe" # 指定chrome位置
|
||||
# option.binary_location = "./chrome.exe" # 指定chrome位置
|
||||
driver_path = "./chromedriver.exe"
|
||||
# option.add_extension("XPathHelper.crx")
|
||||
options.add_extension("XPathHelper.crx")
|
||||
|
||||
# option.add_experimental_option(
|
||||
# 'excludeSwitches', ['enable-automation']) # 以开发者模式
|
||||
options.add_experimental_option(
|
||||
'excludeSwitches', ['enable-automation']) # 以开发者模式
|
||||
|
||||
# user_data_dir = r'' # 注意没有Default!
|
||||
|
||||
# options.add_argument('--user-data-dir='+p)
|
||||
|
||||
# 总结:
|
||||
# 0. 带Cookie需要用userdatadir
|
||||
@ -2018,22 +1986,15 @@ if __name__ == '__main__':
|
||||
except:
|
||||
pass
|
||||
|
||||
# options.add_argument(
|
||||
# '--user-data-dir=C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\User Data') # TMALL 反扒
|
||||
# option.add_argument(
|
||||
# "--disable-blink-features=AutomationControlled") # TMALL 反扒
|
||||
options.add_argument(
|
||||
"--disable-blink-features=AutomationControlled") # TMALL 反扒
|
||||
|
||||
options.add_argument('-ignore-certificate-errors')
|
||||
options.add_argument('-ignore -ssl-errors')
|
||||
# option.add_argument('-ignore-certificate-errors')
|
||||
# option.add_argument('-ignore -ssl-errors')
|
||||
|
||||
if c.headless:
|
||||
print("Headless mode")
|
||||
print("无头模式")
|
||||
# option.add_argument("--headless")
|
||||
options.add_argument("--headless")
|
||||
|
||||
tmp_options = []
|
||||
@ -2058,11 +2019,7 @@ if __name__ == '__main__':
|
||||
shutil.copytree(absolute_user_data_folder, tmp_user_data_folder)
|
||||
print("User data folder copied successfully, if you exit the program before it finishes, please delete the temporary user data folder manually.")
|
||||
print("用户信息目录复制成功,如果程序在运行过程中被手动退出,请手动删除临时用户信息目录。")
|
||||
# option = tmp_options[i]["option"]
|
||||
options = tmp_options[i]["options"]
|
||||
# option.add_argument(
|
||||
# f'--user-data-dir={tmp_user_data_folder}') # TMALL 反扒
|
||||
# option.add_argument("--profile-directory=Default")
|
||||
options.add_argument(
|
||||
f'--user-data-dir={tmp_user_data_folder}') # TMALL 反扒
|
||||
options.add_argument("--profile-directory=Default")
|
||||
@ -2074,7 +2031,6 @@ if __name__ == '__main__':
|
||||
threads = []
|
||||
for i in range(len(c.ids)):
|
||||
id = c.ids[i]
|
||||
# option = tmp_options[i]["option"]
|
||||
options = tmp_options[i]["options"]
|
||||
print("id: ", id)
|
||||
if c.read_type == "remote":
|
||||
@ -2100,7 +2056,6 @@ if __name__ == '__main__':
|
||||
cloudflare = 0
|
||||
if cloudflare == 0:
|
||||
options.add_argument('log-level=3') # 隐藏日志
|
||||
# option.add_argument('log-level=3') # 隐藏日志
|
||||
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id))
|
||||
print("Data path:", path)
|
||||
options.add_experimental_option("prefs", {
|
||||
@ -2116,37 +2071,17 @@ if __name__ == '__main__':
|
||||
'safebrowsing.disable_download_protection': True,
|
||||
'profile.default_content_settings.popups': 0,
|
||||
})
|
||||
# option.add_experimental_option("prefs", {
|
||||
# # 设置文件下载路径
|
||||
# "download.default_directory": path,
|
||||
# "download.prompt_for_download": False, # 禁止下载提示框
|
||||
# "plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
|
||||
# "download.directory_upgrade": True,
|
||||
# "download.extensions_to_open": "applications/pdf",
|
||||
# "plugins.always_open_pdf_externally": True, # 总是在外部程序中打开PDF
|
||||
# "safebrowsing_for_trusted_sources_enabled": False,
|
||||
# "safebrowsing.enabled": False,
|
||||
# 'safebrowsing.enabled': False,
|
||||
# 'safebrowsing.disable_download_protection': True,
|
||||
# 'profile.default_content_settings.popups': 0,
|
||||
# })
|
||||
try:
|
||||
if service["environment"] == 1:
|
||||
# option.add_experimental_option(
|
||||
# 'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览
|
||||
options.add_experimental_option(
|
||||
'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览
|
||||
except:
|
||||
pass
|
||||
# browser_t = MyChrome(
|
||||
# options=options, chrome_options=option, executable_path=driver_path)
|
||||
selenium_service = Service(executable_path=driver_path)
|
||||
browser_t = MyChrome(service=selenium_service, options=options)
|
||||
elif cloudflare == 1:
|
||||
if sys.platform == "win32":
|
||||
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
|
||||
# options.add_argument("--auto-open-devtools-for-tabs")
|
||||
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
|
||||
browser_t = MyUCChrome(
|
||||
options=options, driver_executable_path=driver_path)
|
||||
links = list(filter(isnotnull, service["links"].split("\n")))
|
||||
@ -2200,8 +2135,6 @@ if __name__ == '__main__':
|
||||
# print("您的操作系统不支持暂停功能。")
|
||||
# print("Your operating system does not support the pause function.")
|
||||
|
||||
# print("线程长度:", len(threads) )
|
||||
|
||||
for thread in threads:
|
||||
print()
|
||||
thread.join()
|
||||
|
@ -1,6 +1,6 @@
|
||||
rmdir /s /q build
|
||||
rmdir /s /q dist
|
||||
@REM pyinstaller -F --icon=favicon.ico easyspider_executestage.py
|
||||
pyinstaller -F --icon=favicon.ico --add-data "C:\Python311\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_shared.dll;onnxruntime\capi" --add-data "C:\Python311\Lib\site-packages\ddddocr\common.onnx;ddddocr" easyspider_executestage.py
|
||||
pyinstaller -F --icon=favicon.ico --add-data "C:\Users\q9823\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\onnxruntime\capi\onnxruntime_providers_shared.dll;onnxruntime\capi" --add-data "C:\Users\q9823\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\ddddocr\common.onnx;ddddocr" easyspider_executestage.py
|
||||
del ..\ElectronJS\chrome_win64\easyspider_executestage.exe
|
||||
copy dist\easyspider_executestage.exe ..\ElectronJS\chrome_win64\easyspider_executestage.exe
|
@ -25,75 +25,150 @@ class MyChrome(webdriver.Chrome):
|
||||
self.iframe_env = False # 现在的环境是root还是iframe
|
||||
super().__init__(*args, **kwargs) # 调用父类的 __init__
|
||||
|
||||
# def find_element(self, by=By.ID, value=None, iframe=False):
|
||||
# # 在这里改变查找元素的行为
|
||||
# if self.iframe_env:
|
||||
# super().switch_to.default_content()
|
||||
# self.iframe_env = False
|
||||
# if iframe:
|
||||
# # 获取所有的 iframe
|
||||
# try:
|
||||
# iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# find_element = False
|
||||
# # 遍历所有的 iframe 并查找里面的元素
|
||||
# for iframe in iframes:
|
||||
# # 切换到 iframe
|
||||
# super().switch_to.default_content()
|
||||
# super().switch_to.frame(iframe)
|
||||
# self.iframe_env = True
|
||||
# try:
|
||||
# # 在 iframe 中查找元素
|
||||
# # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
# element = super().find_element(by=by, value=value)
|
||||
# find_element = True
|
||||
# except NoSuchElementException as e:
|
||||
# print(f"No such element found in the iframe: {str(e)}")
|
||||
# except Exception as e:
|
||||
# print(f"Exception: {str(e)}")
|
||||
# # 完成操作后切回主文档
|
||||
# # super().switch_to.default_content()
|
||||
# if find_element:
|
||||
# return element
|
||||
# if not find_element:
|
||||
# raise NoSuchElementException
|
||||
# else:
|
||||
# return super().find_element(by=by, value=value)
|
||||
|
||||
def find_element_recursive(self, by, value, frames):
|
||||
for frame in frames:
|
||||
try:
|
||||
try:
|
||||
self.switch_to.frame(frame)
|
||||
except StaleElementReferenceException:
|
||||
# If the frame has been refreshed, we need to switch to the parent frame first,
|
||||
self.switch_to.parent_frame()
|
||||
self.switch_to.frame(frame)
|
||||
try:
|
||||
# !!! Attempt to find the element in the current frame, not the context (iframe environment will not change to default), therefore we use super().find_element instead of self.find_element
|
||||
element = super(MyChrome, self).find_element(by=by, value=value)
|
||||
return element
|
||||
except NoSuchElementException:
|
||||
# Recurse into nested iframes
|
||||
nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
|
||||
if nested_frames:
|
||||
element = self.find_element_recursive(by, value, nested_frames)
|
||||
if element:
|
||||
return element
|
||||
except Exception as e:
|
||||
print(f"Exception while processing frame: {e}")
|
||||
|
||||
raise NoSuchElementException(f"Element {value} not found in any frame or iframe")
|
||||
|
||||
def find_element(self, by=By.ID, value=None, iframe=False):
|
||||
# 在这里改变查找元素的行为
|
||||
if self.iframe_env:
|
||||
super().switch_to.default_content()
|
||||
self.switch_to.default_content() # Switch back to the main document
|
||||
self.iframe_env = False
|
||||
if iframe:
|
||||
# 获取所有的 iframe
|
||||
try:
|
||||
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并查找里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
frames = self.find_elements(By.CSS_SELECTOR, "iframe")
|
||||
if not frames:
|
||||
raise NoSuchElementException(f"No iframes found in the current page while searching for {value}")
|
||||
self.iframe_env = True
|
||||
try:
|
||||
# 在 iframe 中查找元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
element = super().find_element(by=by, value=value)
|
||||
find_element = True
|
||||
except NoSuchElementException as e:
|
||||
print(f"No such element found in the iframe: {str(e)}")
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}")
|
||||
# 完成操作后切回主文档
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
return element
|
||||
if not find_element:
|
||||
raise NoSuchElementException
|
||||
return self.find_element_recursive(by, value, frames)
|
||||
else:
|
||||
return super().find_element(by=by, value=value)
|
||||
# Find element in the main document as normal
|
||||
return super(MyChrome, self).find_element(by=by, value=value)
|
||||
|
||||
# def find_elements(self, by=By.ID, value=None, iframe=False):
|
||||
# # 在这里改变查找元素的行为
|
||||
# if self.iframe_env:
|
||||
# super().switch_to.default_content()
|
||||
# self.iframe_env = False
|
||||
# if iframe:
|
||||
# # 获取所有的 iframe
|
||||
# iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
# find_element = False
|
||||
# # 遍历所有的 iframe 并找到里面的元素
|
||||
# for iframe in iframes:
|
||||
# # 切换到 iframe
|
||||
# try:
|
||||
# super().switch_to.default_content()
|
||||
# super().switch_to.frame(iframe)
|
||||
# self.iframe_env = True
|
||||
# # 在 iframe 中查找元素
|
||||
# # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
# elements = super().find_elements(by=by, value=value)
|
||||
# if len(elements) > 0:
|
||||
# find_element = True
|
||||
# # 完成操作后切回主文档
|
||||
# # super().switch_to.default_content()
|
||||
# if find_element:
|
||||
# return elements
|
||||
# except NoSuchElementException as e:
|
||||
# print(f"No such element found in the iframe: {str(e)}")
|
||||
# except Exception as e:
|
||||
# print(f"Exception: {str(e)}")
|
||||
# if not find_element:
|
||||
# raise NoSuchElementException
|
||||
# else:
|
||||
# return super().find_elements(by=by, value=value)
|
||||
|
||||
def find_elements_recursive(self, by, value, frames):
|
||||
for frame in frames:
|
||||
try:
|
||||
try:
|
||||
self.switch_to.frame(frame)
|
||||
except StaleElementReferenceException:
|
||||
# If the frame has been refreshed, we need to switch to the parent frame first,
|
||||
self.switch_to.parent_frame()
|
||||
self.switch_to.frame(frame)
|
||||
# Directly find elements in the current frame
|
||||
elements = super(MyChrome, self).find_elements(by=by, value=value)
|
||||
if elements:
|
||||
return elements
|
||||
# Recursively search for elements in nested iframes
|
||||
nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
|
||||
if nested_frames:
|
||||
elements = self.find_elements_recursive(by, value, nested_frames)
|
||||
if elements:
|
||||
return elements
|
||||
except Exception as e:
|
||||
print(f"Exception while processing frame: {e}")
|
||||
|
||||
raise NoSuchElementException(f"Elements with {value} not found in any frame or iframe")
|
||||
|
||||
def find_elements(self, by=By.ID, value=None, iframe=False):
|
||||
# 在这里改变查找元素的行为
|
||||
if self.iframe_env:
|
||||
super().switch_to.default_content()
|
||||
self.switch_to.default_content() # Switch back to the main document
|
||||
self.iframe_env = False
|
||||
if iframe:
|
||||
# 获取所有的 iframe
|
||||
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
|
||||
find_element = False
|
||||
# 遍历所有的 iframe 并找到里面的元素
|
||||
for iframe in iframes:
|
||||
# 切换到 iframe
|
||||
try:
|
||||
super().switch_to.default_content()
|
||||
super().switch_to.frame(iframe)
|
||||
frames = self.find_elements(By.CSS_SELECTOR, "iframe")
|
||||
if not frames:
|
||||
return [] # Return an empty list if no iframes are found
|
||||
self.iframe_env = True
|
||||
# 在 iframe 中查找元素
|
||||
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
|
||||
elements = super().find_elements(by=by, value=value)
|
||||
if len(elements) > 0:
|
||||
find_element = True
|
||||
# 完成操作后切回主文档
|
||||
# super().switch_to.default_content()
|
||||
if find_element:
|
||||
return elements
|
||||
except NoSuchElementException as e:
|
||||
print(f"No such element found in the iframe: {str(e)}")
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}")
|
||||
if not find_element:
|
||||
raise NoSuchElementException
|
||||
return self.find_elements_recursive(by, value, frames)
|
||||
else:
|
||||
return super().find_elements(by=by, value=value)
|
||||
# Find elements in the main document as normal
|
||||
return super(MyChrome, self).find_elements(by=by, value=value)
|
||||
|
||||
# MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径
|
||||
if sys.platform != "darwin":
|
||||
|
4
Extension/manifest_v3/package-lock.json
generated
4
Extension/manifest_v3/package-lock.json
generated
@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "EasySpider",
|
||||
"version": "0.5.0",
|
||||
"version": "0.6.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "EasySpider",
|
||||
"version": "0.5.0",
|
||||
"version": "0.6.0",
|
||||
"license": "AGPL-3.0",
|
||||
"dependencies": {
|
||||
"crx": "^5.0.1",
|
||||
|
@ -1 +1 @@
|
||||
{"language":"zh"}
|
||||
{"language":"en"}
|
Loading…
x
Reference in New Issue
Block a user