mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-22 08:11:29 +08:00
提取pdf文件中图片和文字
This commit is contained in:
parent
2dc7cb4a6d
commit
97fccbd604
@ -1,5 +1,5 @@
|
||||
from datetime import datetime
|
||||
from extract_from_pages import read_pdf
|
||||
from program.extract_from_pages import read_pdf
|
||||
from base import Base
|
||||
from PIL import Image
|
||||
import pandas as pd
|
||||
|
@ -29,7 +29,8 @@ class TestingAgencyReport(PDFBase):
|
||||
valid_time_list = []
|
||||
for line in lines:
|
||||
if 'S$T' in line or 'SST' in line:
|
||||
text_dict['方案编号'] = line.replace('S$T', 'SST').replace('试验方案编号:', '')
|
||||
text_dict['方案编号'] = line.replace('S$T', 'SST')\
|
||||
.replace('试验方案编号:', '').replace('$', '')
|
||||
if 'CNAS' in line:
|
||||
text_dict['标志'] = 'cnas中文,'
|
||||
valid_time = self.is_valid_time(line)
|
||||
@ -65,7 +66,7 @@ class TestingAgencyReport(PDFBase):
|
||||
if 'CSTBB' in line:
|
||||
for li in line.split():
|
||||
if 'CSTBB' in li:
|
||||
self.xlsx_keys['报告编号'] = li.strip().replace('报告编号:', '')
|
||||
self.xlsx_keys['报告编号'] = li.strip().replace('报告编号:', '').replace(')', '')
|
||||
if '样品名称' in line:
|
||||
try:
|
||||
self.xlsx_keys['样品名称'] = line.split()[1].strip().replace(': ', '')
|
||||
|
Loading…
x
Reference in New Issue
Block a user