mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-23 01:29:20 +08:00
New Code
This commit is contained in:
parent
be1bfb7a6b
commit
63ff94a4f2
@ -738,7 +738,7 @@ If the expression returns a value greater than 0 or evaluates to True, the opera
|
|||||||
</select>
|
</select>
|
||||||
<label>Pause/Continue Task Shortcut Key:</label>
|
<label>Pause/Continue Task Shortcut Key:</label>
|
||||||
<input spellcheck=false onkeydown="inputDelete(event)" type="text" value="p" id="pauseKey" class="form-control"></input>
|
<input spellcheck=false onkeydown="inputDelete(event)" type="text" value="p" id="pauseKey" class="form-control"></input>
|
||||||
|
<input type="hidden" id="browser" name="browser" value="chrome"></input>
|
||||||
</div>
|
</div>
|
||||||
<div class="modal-footer">
|
<div class="modal-footer">
|
||||||
<button type="button" id="saveAsButton" style="width: 100px" class="btn btn-outline-primary">Save as</button>
|
<button type="button" id="saveAsButton" style="width: 100px" class="btn btn-outline-primary">Save as</button>
|
||||||
|
@ -738,7 +738,7 @@ print(emotlib.emoji()) # 使用其中的函数。
|
|||||||
</select>
|
</select>
|
||||||
<label>任务暂停/继续快捷键:</label>
|
<label>任务暂停/继续快捷键:</label>
|
||||||
<input spellcheck=false onkeydown="inputDelete(event)" type="text" value="p" id="pauseKey" class="form-control"></input>
|
<input spellcheck=false onkeydown="inputDelete(event)" type="text" value="p" id="pauseKey" class="form-control"></input>
|
||||||
|
<input type="hidden" id="browser" name="browser" value="chrome"></input>
|
||||||
</div>
|
</div>
|
||||||
<div class="modal-footer">
|
<div class="modal-footer">
|
||||||
<button type="button" id="saveAsButton" style="width: 100px" class="btn btn-outline-primary">另存为</button>
|
<button type="button" id="saveAsButton" style="width: 100px" class="btn btn-outline-primary">另存为</button>
|
||||||
|
@ -586,6 +586,7 @@ function saveService(type) {
|
|||||||
"startFromExit": parseInt($("#startFromExit").val()),
|
"startFromExit": parseInt($("#startFromExit").val()),
|
||||||
"pauseKey": $("#pauseKey").val(),
|
"pauseKey": $("#pauseKey").val(),
|
||||||
"containJudge": containJudge,
|
"containJudge": containJudge,
|
||||||
|
"browser": $("#browser").val(),
|
||||||
"desc": serviceDescription,
|
"desc": serviceDescription,
|
||||||
"inputParameters": inputParameters,
|
"inputParameters": inputParameters,
|
||||||
"outputParameters": outputParameters,
|
"outputParameters": outputParameters,
|
||||||
|
File diff suppressed because one or more lines are too long
2
ExecuteStage/.vscode/launch.json
vendored
2
ExecuteStage/.vscode/launch.json
vendored
@ -12,7 +12,7 @@
|
|||||||
"justMyCode": false,
|
"justMyCode": false,
|
||||||
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
|
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
|
||||||
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
|
||||||
"args": ["--ids", "[47]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
|
"args": ["--ids", "[48]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
|
||||||
"--read_type", "remote"]
|
"--read_type", "remote"]
|
||||||
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
|
// "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
|
||||||
}
|
}
|
||||||
|
@ -1734,7 +1734,7 @@ class BrowserThread(Thread):
|
|||||||
location = "Data/Task_" + \
|
location = "Data/Task_" + \
|
||||||
str(self.id) + "/" + self.saveName + "/" + temp_name
|
str(self.id) + "/" + self.saveName + "/" + temp_name
|
||||||
image.save(location)
|
image.save(location)
|
||||||
ocr = DdddOcr()
|
ocr = DdddOcr(show_ad=False)
|
||||||
with open(location, 'rb') as f:
|
with open(location, 'rb') as f:
|
||||||
image_bytes = f.read()
|
image_bytes = f.read()
|
||||||
content = ocr.classification(image_bytes)
|
content = ocr.classification(image_bytes)
|
||||||
@ -2206,8 +2206,23 @@ if __name__ == '__main__':
|
|||||||
'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览
|
'mobileEmulation', {'deviceName': 'iPhone X'}) # 模拟iPhone X浏览
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
try:
|
||||||
|
browser = service["browser"]
|
||||||
|
except:
|
||||||
|
browser = "chrome"
|
||||||
|
if browser == "chrome":
|
||||||
selenium_service = Service(executable_path=driver_path)
|
selenium_service = Service(executable_path=driver_path)
|
||||||
browser_t = MyChrome(service=selenium_service, options=options)
|
browser_t = MyChrome(service=selenium_service, options=options)
|
||||||
|
elif browser == "edge":
|
||||||
|
from selenium.webdriver.edge.service import Service as EdgeService
|
||||||
|
from selenium.webdriver.edge.options import Options as EdgeOptions
|
||||||
|
from myChrome import MyEdge
|
||||||
|
selenium_service = EdgeService(executable_path="msedgedriver.exe")
|
||||||
|
options = EdgeOptions()
|
||||||
|
options.use_chromium = True
|
||||||
|
options.add_argument("--ie-mode")
|
||||||
|
options.add_argument("ie.edgepath=msedge.exe")
|
||||||
|
browser_t = MyEdge(service=selenium_service, options=options)
|
||||||
elif cloudflare == 1:
|
elif cloudflare == 1:
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
|
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
|
||||||
|
BIN
ExecuteStage/msedgedriver.exe
Normal file
BIN
ExecuteStage/msedgedriver.exe
Normal file
Binary file not shown.
@ -172,6 +172,90 @@ class MyChrome(webdriver.Chrome):
|
|||||||
elements = super(MyChrome, self).find_elements(by=by, value=value)
|
elements = super(MyChrome, self).find_elements(by=by, value=value)
|
||||||
return elements
|
return elements
|
||||||
|
|
||||||
|
|
||||||
|
class MyEdge(webdriver.Ie):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.iframe_env = False # 现在的环境是root还是iframe
|
||||||
|
super().__init__(*args, **kwargs) # 调用父类的 __init__
|
||||||
|
|
||||||
|
def find_element_recursive(self, by, value, frames):
|
||||||
|
for frame in frames:
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
self.switch_to.frame(frame)
|
||||||
|
except StaleElementReferenceException:
|
||||||
|
# If the frame has been refreshed, we need to switch to the parent frame first,
|
||||||
|
self.switch_to.parent_frame()
|
||||||
|
self.switch_to.frame(frame)
|
||||||
|
try:
|
||||||
|
# !!! Attempt to find the element in the current frame, not the context (iframe environment will not change to default), therefore we use super().find_element instead of self.find_element
|
||||||
|
element = super(MyEdge, self).find_element(by=by, value=value)
|
||||||
|
return element
|
||||||
|
except NoSuchElementException:
|
||||||
|
# Recurse into nested iframes
|
||||||
|
nested_frames = super(MyEdge, self).find_elements(By.CSS_SELECTOR, "iframe")
|
||||||
|
if nested_frames:
|
||||||
|
element = self.find_element_recursive(by, value, nested_frames)
|
||||||
|
if element:
|
||||||
|
return element
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Exception while processing frame: {e}")
|
||||||
|
|
||||||
|
raise NoSuchElementException(f"Element {value} not found in any frame or iframe")
|
||||||
|
|
||||||
|
def find_element(self, by=By.ID, value=None, iframe=False):
|
||||||
|
self.switch_to.default_content() # Switch back to the main document
|
||||||
|
self.iframe_env = False
|
||||||
|
if iframe:
|
||||||
|
frames = self.find_elements(By.CSS_SELECTOR, "iframe")
|
||||||
|
if not frames:
|
||||||
|
raise NoSuchElementException(f"No iframes found in the current page while searching for {value}")
|
||||||
|
self.iframe_env = True
|
||||||
|
element = self.find_element_recursive(by, value, frames)
|
||||||
|
else:
|
||||||
|
# Find element in the main document as normal
|
||||||
|
element = super(MyEdge, self).find_element(by=by, value=value)
|
||||||
|
return element
|
||||||
|
|
||||||
|
def find_elements_recursive(self, by, value, frames):
|
||||||
|
for frame in frames:
|
||||||
|
try:
|
||||||
|
try:
|
||||||
|
self.switch_to.frame(frame)
|
||||||
|
except StaleElementReferenceException:
|
||||||
|
# If the frame has been refreshed, we need to switch to the parent frame first,
|
||||||
|
self.switch_to.parent_frame()
|
||||||
|
self.switch_to.frame(frame)
|
||||||
|
# Directly find elements in the current frame
|
||||||
|
elements = super(MyEdge, self).find_elements(by=by, value=value)
|
||||||
|
if elements:
|
||||||
|
return elements
|
||||||
|
# Recursively search for elements in nested iframes
|
||||||
|
nested_frames = super(MyEdge, self).find_elements(By.CSS_SELECTOR, "iframe")
|
||||||
|
if nested_frames:
|
||||||
|
elements = self.find_elements_recursive(by, value, nested_frames)
|
||||||
|
if elements:
|
||||||
|
return elements
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Exception while processing frame: {e}")
|
||||||
|
|
||||||
|
raise NoSuchElementException(f"Elements with {value} not found in any frame or iframe")
|
||||||
|
|
||||||
|
def find_elements(self, by=By.ID, value=None, iframe=False):
|
||||||
|
self.switch_to.default_content() # Switch back to the main document
|
||||||
|
self.iframe_env = False
|
||||||
|
if iframe:
|
||||||
|
frames = self.find_elements(By.CSS_SELECTOR, "iframe")
|
||||||
|
if not frames:
|
||||||
|
return [] # Return an empty list if no iframes are found
|
||||||
|
self.iframe_env = True
|
||||||
|
elements = self.find_elements_recursive(by, value, frames)
|
||||||
|
else:
|
||||||
|
# Find elements in the main document as normal
|
||||||
|
elements = super(MyEdge, self).find_elements(by=by, value=value)
|
||||||
|
return elements
|
||||||
|
|
||||||
|
|
||||||
# MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径
|
# MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径
|
||||||
if sys.platform != "darwin":
|
if sys.platform != "darwin":
|
||||||
ES = True
|
ES = True
|
||||||
|
1
ExecuteStage/test_ie/Default/Preferences
Normal file
1
ExecuteStage/test_ie/Default/Preferences
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"alternate_error_pages":{"enabled":false},"autofill":{"enabled":false},"browser":{"check_default_browser":false},"distribution":{"import_bookmarks":false,"import_history":false,"import_search_engine":false,"make_chrome_default_for_user":false,"skip_first_run_ui":true},"dns_prefetching":{"enabled":false},"profile":{"content_settings":{"pattern_pairs":{"https://*,*":{"media-stream":{"audio":"Default","video":"Default"}}}},"default_content_setting_values":{"geolocation":1},"default_content_settings":{"geolocation":1,"mouselock":1,"notifications":1,"popups":1,"ppapi-broker":1},"password_manager_enabled":false},"safebrowsing":{"enabled":false},"search":{"suggest_enabled":false},"translate":{"enabled":false}}
|
0
ExecuteStage/test_ie/First Run
Normal file
0
ExecuteStage/test_ie/First Run
Normal file
1
ExecuteStage/test_ie/Local State
Normal file
1
ExecuteStage/test_ie/Local State
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"background_mode":{"enabled":false},"ssl":{"rev_checking":{"enabled":false}}}
|
Loading…
x
Reference in New Issue
Block a user