pdf 处理

This commit is contained in:
aiyingfeng 2023-07-27 10:52:01 +08:00
parent 9ebc5d49a7
commit c7d2099d3b

View File

@ -27,6 +27,7 @@ class Discern(Base):
# 替换空单元格 # 替换空单元格
pf.fillna(' ', inplace=True) pf.fillna(' ', inplace=True)
# 输出 # 输出
pf = pf.sort_values(by='样品名称')
pf.to_excel(file_path, index=False) pf.to_excel(file_path, index=False)
# 保存表格 # 保存表格
file_path.close() file_path.close()
@ -46,12 +47,15 @@ class Discern(Base):
lines = text.split() lines = text.split()
valid_time_list = [] valid_time_list = []
for line in lines: for line in lines:
if 'SST' in line: if 'SST' in line and not self.xlsx_keys['方案编号']:
self.xlsx_keys['方案编号'] = line self.xlsx_keys['方案编号'] = line
if '签发日期' in line: if '签发日期' in line and not self.xlsx_keys['签发日期']:
self.xlsx_keys['签发日期'] = line.replace('签发日期', '') self.xlsx_keys['签发日期'] = line.replace('签发日期', '')
if 'GLP' in line:
self.xlsx_keys['标志'] += 'GLP,'
valid_time = self.is_valid_time(line) valid_time = self.is_valid_time(line)
if valid_time: if valid_time:
valid_time_list.append(valid_time) valid_time_list.append(valid_time)
@ -85,7 +89,6 @@ class Discern(Base):
if '最终报告' in line or 'Final Report' in line: if '最终报告' in line or 'Final Report' in line:
self.xlsx_keys['检测项目'] = line_str.replace('最终报告', '').replace('Final Report', '')\ self.xlsx_keys['检测项目'] = line_str.replace('最终报告', '').replace('Final Report', '')\
.replace('中国认可国际互认检测', '') .replace('中国认可国际互认检测', '')
self.xlsx_keys['标志'] = ''
company_list = company.split() company_list = company.split()
for company_str in company_list: for company_str in company_list:
@ -161,8 +164,7 @@ class Discern(Base):
self.pdf_text(file_path) self.pdf_text(file_path)
self.pdf_images(file_path) self.pdf_images(file_path)
self.get_images_text() self.get_images_text()
if not self.xlsx_keys['方案编号'] and not self.xlsx_keys['签发日期']: self.pdf_all_text(file_path)
self.pdf_all_text(file_path)
if not self.xlsx_keys['方案编号']: if not self.xlsx_keys['方案编号']:
matches = re.findall(r'SST\d+BB', file_name) matches = re.findall(r'SST\d+BB', file_name)
if matches: if matches: