diff --git a/pdf处理/img/cma.jpg b/pdf处理/img/cma.png similarity index 100% rename from pdf处理/img/cma.jpg rename to pdf处理/img/cma.png diff --git a/pdf处理/img/cnas.jpg b/pdf处理/img/cnas.png similarity index 100% rename from pdf处理/img/cnas.jpg rename to pdf处理/img/cnas.png diff --git a/pdf处理/program/discern.py b/pdf处理/program/discern.py index 30de6ed..afaf469 100644 --- a/pdf处理/program/discern.py +++ b/pdf处理/program/discern.py @@ -11,11 +11,12 @@ class Discern(object): def __init__(self): self.image_text_ocr = ImageTextOcr() self.xlsx_keys = {} + self.xlsx_keys_list = [] self.num = 0 def export_excel(self, export): # 将字典列表转换为DataFrame - pf = pd.DataFrame(list([export])) + pf = pd.DataFrame(list(export)) file_path = pd.ExcelWriter('../docs/结果.xlsx') # 替换空单元格 pf.fillna(' ', inplace=True) @@ -56,16 +57,16 @@ class Discern(object): for img in images: # 获取图片的二进制流 self.num += 1 - image_file = f"../target_img/image_{self.num}.jpg" + image_file = f"../target_img/image_{self.num}.png" with open(image_file, "wb") as f: f.write(img['stream'].get_data()) def get_images_text(self): for i in range(1, self.num + 1): try: - cma_flag = image_compare.run(f'../target_img/image_{i}.jpg', '../img/cma.jpg') - cnas_flag = image_compare.run(f'../target_img/image_{i}.jpg', '../img/cnas.jpg') - text_list = self.image_text_ocr.run(f'../target_img/image_{i}.jpg') + cma_flag = image_compare.run(f'../target_img/image_{i}.png', '../img/cma.png') + cnas_flag = image_compare.run(f'../target_img/image_{i}.png', '../img/cnas.png') + text_list = self.image_text_ocr.run(f'../target_img/image_{i}.png') except cv2.error as c: pass if cma_flag: @@ -108,7 +109,8 @@ class Discern(object): self.pdf_text(file_path) self.pdf_images(file_path) self.get_images_text() - self.export_excel(self.xlsx_keys) + self.xlsx_keys_list.append(self.xlsx_keys) + self.export_excel(self.xlsx_keys_list) if __name__ == '__main__': diff --git a/抖音js逆向学习/抖店精选联盟数据/sql/baiyin/eb_supports_baiyin.sql b/抖音js逆向学习/抖店精选联盟数据/sql/baiyin/eb_supports_baiyin.sql index cd666df..ca2fef7 100644 --- a/抖音js逆向学习/抖店精选联盟数据/sql/baiyin/eb_supports_baiyin.sql +++ b/抖音js逆向学习/抖店精选联盟数据/sql/baiyin/eb_supports_baiyin.sql @@ -272,26 +272,6 @@ CREATE TABLE `project_buyin_authorStatData` ( UNIQUE KEY `task_id` (`task_id`,`deduplication`) USING BTREE ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC; /*!40101 SET character_set_client = @saved_cs_client */; - --- --- Table structure for table `project_daduoduo_dy_Tiktok_search_Keyword` --- - -DROP TABLE IF EXISTS `project_daduoduo_dy_Tiktok_search_Keyword`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!50503 SET character_set_client = utf8mb4 */; -CREATE TABLE `project_daduoduo_dy_Tiktok_search_Keyword` ( - `task_id` varchar(100) DEFAULT NULL COMMENT '项目id', - `payload_get` text COMMENT 'get请求参数', - `payload_post` varchar(255) DEFAULT '' COMMENT 'post请求参数', - `deduplication` varchar(50) DEFAULT '' COMMENT '去重字段', - `weight` tinyint(1) DEFAULT '0' COMMENT '权重', - `status` tinyint(1) DEFAULT '0', - `create_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP, - `update_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - UNIQUE KEY `task_id` (`task_id`,`deduplication`) USING BTREE -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=DYNAMIC; -/*!40101 SET character_set_client = @saved_cs_client */; /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; /*!40101 SET SQL_MODE=@OLD_SQL_MODE */; @@ -302,4 +282,4 @@ CREATE TABLE `project_daduoduo_dy_Tiktok_search_Keyword` ( /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; --- Dump completed on 2023-07-19 17:35:07 +-- Dump completed on 2023-07-24 11:36:31