提取pdf文件中图片和文字

2025-04-12 11:37:09 +08:00 · 2023-08-05 10:41:28 +08:00 · 2023-08-05 10:41:28 +08:00 · 4cefde375c
commit 4cefde375c
parent 9ce7645354
2 changed files with 3 additions and 2 deletions
--- a/pdf处理/program/pdf_base.py
+++ b/pdf处理/program/pdf_base.py
@ -110,7 +110,7 @@ class PDFBase(Base):
        # 替换空单元格
        pf.fillna(' ', inplace=True)
        # 输出
-        pf = pf.sort_values(by='样品名称')
+        pf = pf.sort_values(by='文件名')
        pf.to_excel(file_path, index=False)
        # 保存表格
        file_path.close()
--- a/pdf处理/program/testing_agency_report.py
+++ b/pdf处理/program/testing_agency_report.py
@ -126,7 +126,8 @@ class TestingAgencyReport(PDFBase):
                    '检测项目': '',
                    '标志': '',
                    '签发日期': '',
-                    '公司名称': ''
+                    '公司名称': '',
+                    '文件名': ''
                }
                self.remove_img(img_path)
                if entry.is_file():