From b8f8c190bafd767371cd7e9966905da74f571bf1 Mon Sep 17 00:00:00 2001 From: naibo Date: Sun, 9 Jul 2023 01:41:26 +0800 Subject: [PATCH] V0.3.5 --- .temp_to_pub/compress.py | 39 +++++++++++++++++-------- ExecuteStage/easyspider_executestage.py | 5 +++- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/.temp_to_pub/compress.py b/.temp_to_pub/compress.py index 310aaf2..a4750a0 100644 --- a/.temp_to_pub/compress.py +++ b/.temp_to_pub/compress.py @@ -14,8 +14,13 @@ import py7zr def compress_folder_to_7z(folder_path, output_file): if os.path.exists(output_file): os.remove(output_file) - with py7zr.SevenZipFile(output_file, 'w') as archive: - archive.writeall(folder_path, output_file) + # with py7zr.SevenZipFile(output_file, 'w') as archive: + # archive.writeall(folder_path, output_file) + # 压缩文件夹 + try: + subprocess.call(["7z", "a", output_file, folder_path]) + except: + subprocess.call(["7za", "a", output_file, folder_path]) def compress_folder_to_7z_split(folder_path, output_file): if os.path.exists(output_file): @@ -54,11 +59,13 @@ if __name__ == "__main__": shutil.rmtree("./EasySpider_windows_x64/Data") if os.path.exists("./EasySpider_windows_x64/execution_instances"): shutil.rmtree("./EasySpider_windows_x64/execution_instances") - os.remove("./EasySpider_windows_x64/config.json") - os.remove("./EasySpider_windows_x64/mysql_config.json") + if os.path.exists("./EasySpider_windows_x64/config.json"): + os.remove("./EasySpider_windows_x64/config.json") + if os.path.exists("./EasySpider_windows_x64/mysql_config.json"): + os.remove("./EasySpider_windows_x64/mysql_config.json") os.mkdir("./EasySpider_windows_x64/Data") os.mkdir("./EasySpider_windows_x64/execution_instances") - compress_folder_to_7z_split("./EasySpider_windows_x64", file_name) + # compress_folder_to_7z_split("./EasySpider_windows_x64", file_name) print(f"Compress {file_name} Split successfully!") compress_folder_to_7z("./EasySpider_windows_x64", file_name) print(f"Compress {file_name} successfully!") @@ -70,20 +77,28 @@ if __name__ == "__main__": shutil.rmtree("./EasySpider_windows_x86/Data") if os.path.exists("./EasySpider_windows_x86/execution_instances"): shutil.rmtree("./EasySpider_windows_x86/execution_instances") - os.remove("./EasySpider_windows_x86/config.json") - os.remove("./EasySpider_windows_x86/mysql_config.json") + if os.path.exists("./EasySpider_windows_x86/config.json"): + os.remove("./EasySpider_windows_x86/config.json") + if os.path.exists("./EasySpider_windows_x86/mysql_config.json"): + os.remove("./EasySpider_windows_x86/mysql_config.json") os.mkdir("./EasySpider_windows_x86/Data") os.mkdir("./EasySpider_windows_x86/execution_instances") - compress_folder_to_7z("./EasySpider_windows_x64", file_name) + compress_folder_to_7z_split("./EasySpider_windows_x86", file_name) + print(f"Compress {file_name} Split successfully!") + compress_folder_to_7z("./EasySpider_windows_x86", file_name) print(f"Compress {file_name} successfully!") elif sys.platform == "linux" and platform.architecture()[0] == "64bit": file_name = f"EasySpider_{easyspider_version}_Linux_x64.7z" if os.path.exists("./EasySpider_Linux_x64/user_data"): shutil.rmtree("./EasySpider_Linux_x64/user_data") - shutil.rmtree("./EasySpider_Linux_x64/Data") - shutil.rmtree("./EasySpider_Linux_x64/execution_instances") - shutil.rmtree("./EasySpider_Linux_x64/config.json") - shutil.rmtree("./EasySpider_Linux_x64/mysql_config.json") + if os.path.exists("./EasySpider_Linux_x64/Data"): + shutil.rmtree("./EasySpider_Linux_x64/Data") + if os.path.exists("./EasySpider_Linux_x64/execution_instances"): + shutil.rmtree("./EasySpider_Linux_x64/execution_instances") + if os.path.exists("./EasySpider_Linux_x64/config.json"): + os.remove("./EasySpider_Linux_x64/config.json") + if os.path.exists("./EasySpider_Linux_x64/mysql_config.json"): + os.remove("./EasySpider_Linux_x64/mysql_config.json") os.mkdir("./EasySpider_Linux_x64/Data") os.mkdir("./EasySpider_Linux_x64/execution_instances") # compress_folder_to_7z("./EasySpider_Linux_x64", file_name) diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 62ff0d1..e4df6ae 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -1144,7 +1144,10 @@ class BrowserThread(Thread): # 提取数据事件 def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0): - pageHTML = etree.HTML(self.browser.page_source) + try: + pageHTML = etree.HTML(self.browser.page_source) + except: + pageHTML = "" if loopElement != "": # 只在数据在循环中提取时才需要获取循环元素 try: loopElementOuterHTML = loopElement.get_attribute('outerHTML')