Iframe Nested

This commit is contained in:
naibo 2023-12-08 06:08:45 +08:00
parent ab0fad5b5a
commit c3773848c3
16 changed files with 156 additions and 138 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -12,8 +12,8 @@
"justMyCode": false, "justMyCode": false,
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
// "args": ["--ids", "[1]", "--headless", "0", "--user_data", "1", "--keyboard", "1"] "args": ["--ids", "[149]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
"args": "--ids '[3]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name" // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
} }
] ]
} }

View File

@ -2,6 +2,7 @@
# import atexit # import atexit
import atexit import atexit
import copy import copy
import platform
import shutil import shutil
import string import string
import undetected_chromedriver as uc import undetected_chromedriver as uc
@ -1711,6 +1712,7 @@ class BrowserThread(Thread):
p["relativeXPath"], self.outputParameters, self) p["relativeXPath"], self.outputParameters, self)
# 只有当前环境不变变化才可以快速提取数据 # 只有当前环境不变变化才可以快速提取数据
if self.browser.iframe_env != p["iframe"]: if self.browser.iframe_env != p["iframe"]:
# if p["iframe"] or self.browser.iframe_env != p["iframe"]: # 如果是iframe则不能快速提取数据主要是各个上下文的iframe切换但一般不会有人这么做
p["optimizable"] = False p["optimizable"] = False
continue continue
# relativeXPath = relativeXPath.lower() # relativeXPath = relativeXPath.lower()
@ -1820,7 +1822,7 @@ class BrowserThread(Thread):
element = self.browser.find_element( element = self.browser.find_element(
By.XPATH, relativeXPath, iframe=p["iframe"]) By.XPATH, relativeXPath, iframe=p["iframe"])
except ( except (
NoSuchElementException, InvalidSelectorException, StaleElementReferenceException): # 找不到元素的时候,使用默认值 NoSuchElementException, InvalidSelectorException, StaleElementReferenceException) as e: # 找不到元素的时候,使用默认值
# self.print_and_log(p) # self.print_and_log(p)
try: try:
content = p["default"] content = p["default"]
@ -1835,6 +1837,7 @@ class BrowserThread(Thread):
self.print_and_log( self.print_and_log(
"提取数据操作时,字段名 %s 对应XPath %s 未找到,使用默认值,本字段将不再重复报错" % ( "提取数据操作时,字段名 %s 对应XPath %s 未找到,使用默认值,本字段将不再重复报错" % (
p["name"], relativeXPath)) p["name"], relativeXPath))
self.dataNotFoundKeys[p["name"]] = True
except: except:
pass pass
continue continue
@ -1916,92 +1919,57 @@ if __name__ == '__main__':
print(c) print(c)
options = webdriver.ChromeOptions() options = webdriver.ChromeOptions()
driver_path = "chromedriver.exe" driver_path = "chromedriver.exe"
import platform
print(sys.platform, platform.architecture()) print(sys.platform, platform.architecture())
# option = webdriver.ChromeOptions()
if not os.path.exists(os.getcwd() + "/Data"): if not os.path.exists(os.getcwd() + "/Data"):
os.mkdir(os.getcwd() + "/Data") os.mkdir(os.getcwd() + "/Data")
if sys.platform == "darwin" and platform.architecture()[0] == "64bit": if sys.platform == "darwin" and platform.architecture()[0] == "64bit":
options.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome" options.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
# MacOS需要用option而不是options
# option.binary_location = "EasySpider.app/Contents/Resources/app/chrome_mac64.app/Contents/MacOS/Google Chrome"
# option.add_extension(
# "EasySpider.app/Contents/Resources/app/XPathHelper.crx")
options.add_extension( options.add_extension(
"EasySpider.app/Contents/Resources/app/XPathHelper.crx") "EasySpider.app/Contents/Resources/app/XPathHelper.crx")
driver_path = "EasySpider.app/Contents/Resources/app/chromedriver_mac64" driver_path = "EasySpider.app/Contents/Resources/app/chromedriver_mac64"
# options.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
# # MacOS需要用option而不是options
# option.binary_location = "chrome_mac64.app/Contents/MacOS/Google Chrome"
# driver_path = os.getcwd()+ "/chromedriver_mac64"
print(driver_path) print(driver_path)
if c.config_folder == "": if c.config_folder == "":
c.config_folder = os.path.expanduser( c.config_folder = os.path.expanduser(
"~/Library/Application Support/EasySpider/") "~/Library/Application Support/EasySpider/")
# print("Config folder for MacOS:", c.config_folder)
elif os.path.exists(os.getcwd() + "/EasySpider/resources"): # 打包后的路径 elif os.path.exists(os.getcwd() + "/EasySpider/resources"): # 打包后的路径
print("Finding chromedriver in EasySpider", print("Finding chromedriver in EasySpider",
os.getcwd() + "/EasySpider") os.getcwd() + "/EasySpider")
if sys.platform == "win32" and platform.architecture()[0] == "32bit": if sys.platform == "win32" and platform.architecture()[0] == "32bit":
options.binary_location = os.path.join( options.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置 os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置
# option.binary_location = os.path.join(
# os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置
driver_path = os.path.join( driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe") os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
# option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx") options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "win32" and platform.architecture()[0] == "64bit": elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
options.binary_location = os.path.join( options.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe") os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
# option.binary_location = os.path.join(
# os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
driver_path = os.path.join( driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe") os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
# option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx") options.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "linux" and platform.architecture()[0] == "64bit": elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome" options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
# option.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64" driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
# option.add_extension("EasySpider/resources/app/XPathHelper.crx")
options.add_extension("EasySpider/resources/app/XPathHelper.crx") options.add_extension("EasySpider/resources/app/XPathHelper.crx")
else: else:
print("Unsupported platform") print("Unsupported platform")
sys.exit() sys.exit()
print("Chrome location:", options.binary_location) print("Chrome location:", options.binary_location)
print("Chromedriver location:", driver_path) print("Chromedriver location:", driver_path)
# elif os.getcwd().find("ExecuteStage") >= 0: # 如果直接执行
# print("Finding chromedriver in ./Chrome",
# os.getcwd()+"/Chrome")
# options.binary_location = "./Chrome/chrome.exe" # 指定chrome位置
# # option.binary_location = "C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe"
# driver_path = "./Chrome/chromedriver.exe"
elif os.path.exists(os.getcwd() + "/../ElectronJS"): elif os.path.exists(os.getcwd() + "/../ElectronJS"):
# 软件dev用 # 软件dev用
print("Finding chromedriver in EasySpider", print("Finding chromedriver in EasySpider",
os.getcwd() + "/ElectronJS") os.getcwd() + "/ElectronJS")
# option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
options.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置 options.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe" driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
# option.add_extension("../ElectronJS/XPathHelper.crx")
options.add_extension("../ElectronJS/XPathHelper.crx") options.add_extension("../ElectronJS/XPathHelper.crx")
else: else:
options.binary_location = "./chrome.exe" # 指定chrome位置 options.binary_location = "./chrome.exe" # 指定chrome位置
# option.binary_location = "./chrome.exe" # 指定chrome位置
driver_path = "./chromedriver.exe" driver_path = "./chromedriver.exe"
# option.add_extension("XPathHelper.crx")
options.add_extension("XPathHelper.crx") options.add_extension("XPathHelper.crx")
# option.add_experimental_option(
# 'excludeSwitches', ['enable-automation']) # 以开发者模式
options.add_experimental_option( options.add_experimental_option(
'excludeSwitches', ['enable-automation']) # 以开发者模式 'excludeSwitches', ['enable-automation']) # 以开发者模式
# user_data_dir = r'' # 注意没有Default
# options.add_argument('--user-data-dir='+p)
# 总结: # 总结:
# 0. 带Cookie需要用userdatadir # 0. 带Cookie需要用userdatadir
@ -2018,22 +1986,15 @@ if __name__ == '__main__':
except: except:
pass pass
# options.add_argument(
# '--user-data-dir=C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\User Data') # TMALL 反扒
# option.add_argument(
# "--disable-blink-features=AutomationControlled") # TMALL 反扒
options.add_argument( options.add_argument(
"--disable-blink-features=AutomationControlled") # TMALL 反扒 "--disable-blink-features=AutomationControlled") # TMALL 反扒
options.add_argument('-ignore-certificate-errors') options.add_argument('-ignore-certificate-errors')
options.add_argument('-ignore -ssl-errors') options.add_argument('-ignore -ssl-errors')
# option.add_argument('-ignore-certificate-errors')
# option.add_argument('-ignore -ssl-errors')
if c.headless: if c.headless:
print("Headless mode") print("Headless mode")
print("无头模式") print("无头模式")
# option.add_argument("--headless")
options.add_argument("--headless") options.add_argument("--headless")
tmp_options = [] tmp_options = []
@ -2058,11 +2019,7 @@ if __name__ == '__main__':
shutil.copytree(absolute_user_data_folder, tmp_user_data_folder) shutil.copytree(absolute_user_data_folder, tmp_user_data_folder)
print("User data folder copied successfully, if you exit the program before it finishes, please delete the temporary user data folder manually.") print("User data folder copied successfully, if you exit the program before it finishes, please delete the temporary user data folder manually.")
print("用户信息目录复制成功,如果程序在运行过程中被手动退出,请手动删除临时用户信息目录。") print("用户信息目录复制成功,如果程序在运行过程中被手动退出,请手动删除临时用户信息目录。")
# option = tmp_options[i]["option"]
options = tmp_options[i]["options"] options = tmp_options[i]["options"]
# option.add_argument(
# f'--user-data-dir={tmp_user_data_folder}') # TMALL 反扒
# option.add_argument("--profile-directory=Default")
options.add_argument( options.add_argument(
f'--user-data-dir={tmp_user_data_folder}') # TMALL 反扒 f'--user-data-dir={tmp_user_data_folder}') # TMALL 反扒
options.add_argument("--profile-directory=Default") options.add_argument("--profile-directory=Default")
@ -2074,7 +2031,6 @@ if __name__ == '__main__':
threads = [] threads = []
for i in range(len(c.ids)): for i in range(len(c.ids)):
id = c.ids[i] id = c.ids[i]
# option = tmp_options[i]["option"]
options = tmp_options[i]["options"] options = tmp_options[i]["options"]
print("id: ", id) print("id: ", id)
if c.read_type == "remote": if c.read_type == "remote":
@ -2100,7 +2056,6 @@ if __name__ == '__main__':
cloudflare = 0 cloudflare = 0
if cloudflare == 0: if cloudflare == 0:
options.add_argument('log-level=3') # 隐藏日志 options.add_argument('log-level=3') # 隐藏日志
# option.add_argument('log-level=3') # 隐藏日志
path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id)) path = os.path.join(os.path.abspath("./"), "Data", "Task_" + str(id))
print("Data path:", path) print("Data path:", path)
options.add_experimental_option("prefs", { options.add_experimental_option("prefs", {
@ -2116,37 +2071,17 @@ if __name__ == '__main__':
'safebrowsing.disable_download_protection': True, 'safebrowsing.disable_download_protection': True,
'profile.default_content_settings.popups': 0, 'profile.default_content_settings.popups': 0,
}) })
# option.add_experimental_option("prefs", {
# # 设置文件下载路径
# "download.default_directory": path,
# "download.prompt_for_download": False, # 禁止下载提示框
# "plugins.plugins_list": [{"enabled": False, "name": "Chrome PDF Viewer"}],
# "download.directory_upgrade": True,
# "download.extensions_to_open": "applications/pdf",
# "plugins.always_open_pdf_externally": True, # 总是在外部程序中打开PDF
# "safebrowsing_for_trusted_sources_enabled": False,
# "safebrowsing.enabled": False,
# 'safebrowsing.enabled': False,
# 'safebrowsing.disable_download_protection': True,
# 'profile.default_content_settings.popups': 0,
# })
try: try:
if service["environment"] == 1: if service["environment"] == 1:
# option.add_experimental_option(
# 'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览
options.add_experimental_option( options.add_experimental_option(
'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览 'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览
except: except:
pass pass
# browser_t = MyChrome(
# options=options, chrome_options=option, executable_path=driver_path)
selenium_service = Service(executable_path=driver_path) selenium_service = Service(executable_path=driver_path)
browser_t = MyChrome(service=selenium_service, options=options) browser_t = MyChrome(service=selenium_service, options=options)
elif cloudflare == 1: elif cloudflare == 1:
if sys.platform == "win32": if sys.platform == "win32":
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器 options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
# options.add_argument("--auto-open-devtools-for-tabs")
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
browser_t = MyUCChrome( browser_t = MyUCChrome(
options=options, driver_executable_path=driver_path) options=options, driver_executable_path=driver_path)
links = list(filter(isnotnull, service["links"].split("\n"))) links = list(filter(isnotnull, service["links"].split("\n")))
@ -2200,8 +2135,6 @@ if __name__ == '__main__':
# print("您的操作系统不支持暂停功能。") # print("您的操作系统不支持暂停功能。")
# print("Your operating system does not support the pause function.") # print("Your operating system does not support the pause function.")
# print("线程长度:", len(threads) )
for thread in threads: for thread in threads:
print() print()
thread.join() thread.join()

View File

@ -1,6 +1,6 @@
rmdir /s /q build rmdir /s /q build
rmdir /s /q dist rmdir /s /q dist
@REM pyinstaller -F --icon=favicon.ico easyspider_executestage.py @REM pyinstaller -F --icon=favicon.ico easyspider_executestage.py
pyinstaller -F --icon=favicon.ico --add-data "C:\Python311\Lib\site-packages\onnxruntime\capi\onnxruntime_providers_shared.dll;onnxruntime\capi" --add-data "C:\Python311\Lib\site-packages\ddddocr\common.onnx;ddddocr" easyspider_executestage.py pyinstaller -F --icon=favicon.ico --add-data "C:\Users\q9823\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\onnxruntime\capi\onnxruntime_providers_shared.dll;onnxruntime\capi" --add-data "C:\Users\q9823\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\ddddocr\common.onnx;ddddocr" easyspider_executestage.py
del ..\ElectronJS\chrome_win64\easyspider_executestage.exe del ..\ElectronJS\chrome_win64\easyspider_executestage.exe
copy dist\easyspider_executestage.exe ..\ElectronJS\chrome_win64\easyspider_executestage.exe copy dist\easyspider_executestage.exe ..\ElectronJS\chrome_win64\easyspider_executestage.exe

View File

@ -25,75 +25,150 @@ class MyChrome(webdriver.Chrome):
self.iframe_env = False # 现在的环境是root还是iframe self.iframe_env = False # 现在的环境是root还是iframe
super().__init__(*args, **kwargs) # 调用父类的 __init__ super().__init__(*args, **kwargs) # 调用父类的 __init__
# def find_element(self, by=By.ID, value=None, iframe=False):
# # 在这里改变查找元素的行为
# if self.iframe_env:
# super().switch_to.default_content()
# self.iframe_env = False
# if iframe:
# # 获取所有的 iframe
# try:
# iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
# except Exception as e:
# print(e)
# find_element = False
# # 遍历所有的 iframe 并查找里面的元素
# for iframe in iframes:
# # 切换到 iframe
# super().switch_to.default_content()
# super().switch_to.frame(iframe)
# self.iframe_env = True
# try:
# # 在 iframe 中查找元素
# # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
# element = super().find_element(by=by, value=value)
# find_element = True
# except NoSuchElementException as e:
# print(f"No such element found in the iframe: {str(e)}")
# except Exception as e:
# print(f"Exception: {str(e)}")
# # 完成操作后切回主文档
# # super().switch_to.default_content()
# if find_element:
# return element
# if not find_element:
# raise NoSuchElementException
# else:
# return super().find_element(by=by, value=value)
def find_element_recursive(self, by, value, frames):
for frame in frames:
try:
try:
self.switch_to.frame(frame)
except StaleElementReferenceException:
# If the frame has been refreshed, we need to switch to the parent frame first,
self.switch_to.parent_frame()
self.switch_to.frame(frame)
try:
# !!! Attempt to find the element in the current frame, not the context (iframe environment will not change to default), therefore we use super().find_element instead of self.find_element
element = super(MyChrome, self).find_element(by=by, value=value)
return element
except NoSuchElementException:
# Recurse into nested iframes
nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
if nested_frames:
element = self.find_element_recursive(by, value, nested_frames)
if element:
return element
except Exception as e:
print(f"Exception while processing frame: {e}")
raise NoSuchElementException(f"Element {value} not found in any frame or iframe")
def find_element(self, by=By.ID, value=None, iframe=False): def find_element(self, by=By.ID, value=None, iframe=False):
# 在这里改变查找元素的行为 self.switch_to.default_content() # Switch back to the main document
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False self.iframe_env = False
if iframe: if iframe:
# 获取所有的 iframe frames = self.find_elements(By.CSS_SELECTOR, "iframe")
try: if not frames:
iframes = super().find_elements(By.CSS_SELECTOR, "iframe") raise NoSuchElementException(f"No iframes found in the current page while searching for {value}")
except Exception as e:
print(e)
find_element = False
# 遍历所有的 iframe 并查找里面的元素
for iframe in iframes:
# 切换到 iframe
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True self.iframe_env = True
try: return self.find_element_recursive(by, value, frames)
# 在 iframe 中查找元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
element = super().find_element(by=by, value=value)
find_element = True
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
return element
if not find_element:
raise NoSuchElementException
else: else:
return super().find_element(by=by, value=value) # Find element in the main document as normal
return super(MyChrome, self).find_element(by=by, value=value)
# def find_elements(self, by=By.ID, value=None, iframe=False):
# # 在这里改变查找元素的行为
# if self.iframe_env:
# super().switch_to.default_content()
# self.iframe_env = False
# if iframe:
# # 获取所有的 iframe
# iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
# find_element = False
# # 遍历所有的 iframe 并找到里面的元素
# for iframe in iframes:
# # 切换到 iframe
# try:
# super().switch_to.default_content()
# super().switch_to.frame(iframe)
# self.iframe_env = True
# # 在 iframe 中查找元素
# # 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
# elements = super().find_elements(by=by, value=value)
# if len(elements) > 0:
# find_element = True
# # 完成操作后切回主文档
# # super().switch_to.default_content()
# if find_element:
# return elements
# except NoSuchElementException as e:
# print(f"No such element found in the iframe: {str(e)}")
# except Exception as e:
# print(f"Exception: {str(e)}")
# if not find_element:
# raise NoSuchElementException
# else:
# return super().find_elements(by=by, value=value)
def find_elements_recursive(self, by, value, frames):
for frame in frames:
try:
try:
self.switch_to.frame(frame)
except StaleElementReferenceException:
# If the frame has been refreshed, we need to switch to the parent frame first,
self.switch_to.parent_frame()
self.switch_to.frame(frame)
# Directly find elements in the current frame
elements = super(MyChrome, self).find_elements(by=by, value=value)
if elements:
return elements
# Recursively search for elements in nested iframes
nested_frames = super(MyChrome, self).find_elements(By.CSS_SELECTOR, "iframe")
if nested_frames:
elements = self.find_elements_recursive(by, value, nested_frames)
if elements:
return elements
except Exception as e:
print(f"Exception while processing frame: {e}")
raise NoSuchElementException(f"Elements with {value} not found in any frame or iframe")
def find_elements(self, by=By.ID, value=None, iframe=False): def find_elements(self, by=By.ID, value=None, iframe=False):
# 在这里改变查找元素的行为 self.switch_to.default_content() # Switch back to the main document
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False self.iframe_env = False
if iframe: if iframe:
# 获取所有的 iframe frames = self.find_elements(By.CSS_SELECTOR, "iframe")
iframes = super().find_elements(By.CSS_SELECTOR, "iframe") if not frames:
find_element = False return [] # Return an empty list if no iframes are found
# 遍历所有的 iframe 并找到里面的元素
for iframe in iframes:
# 切换到 iframe
try:
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True self.iframe_env = True
# 在 iframe 中查找元素 return self.find_elements_recursive(by, value, frames)
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
elements = super().find_elements(by=by, value=value)
if len(elements) > 0:
find_element = True
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
return elements
except NoSuchElementException as e:
print(f"No such element found in the iframe: {str(e)}")
except Exception as e:
print(f"Exception: {str(e)}")
if not find_element:
raise NoSuchElementException
else: else:
return super().find_elements(by=by, value=value) # Find elements in the main document as normal
return super(MyChrome, self).find_elements(by=by, value=value)
# MacOS不支持直接打包带Cloudflare的功能如果要自己编译运行可以把这个if去掉然后配置好浏览器和driver路径 # MacOS不支持直接打包带Cloudflare的功能如果要自己编译运行可以把这个if去掉然后配置好浏览器和driver路径
if sys.platform != "darwin": if sys.platform != "darwin":

View File

@ -1,12 +1,12 @@
{ {
"name": "EasySpider", "name": "EasySpider",
"version": "0.5.0", "version": "0.6.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "EasySpider", "name": "EasySpider",
"version": "0.5.0", "version": "0.6.0",
"license": "AGPL-3.0", "license": "AGPL-3.0",
"dependencies": { "dependencies": {
"crx": "^5.0.1", "crx": "^5.0.1",

View File

@ -1 +1 @@
{"language":"zh"} {"language":"en"}