mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-20 01:34:55 +08:00
pdf 处理
This commit is contained in:
parent
9ebc5d49a7
commit
c7d2099d3b
@ -27,6 +27,7 @@ class Discern(Base):
|
||||
# 替换空单元格
|
||||
pf.fillna(' ', inplace=True)
|
||||
# 输出
|
||||
pf = pf.sort_values(by='样品名称')
|
||||
pf.to_excel(file_path, index=False)
|
||||
# 保存表格
|
||||
file_path.close()
|
||||
@ -46,12 +47,15 @@ class Discern(Base):
|
||||
lines = text.split()
|
||||
valid_time_list = []
|
||||
for line in lines:
|
||||
if 'SST' in line:
|
||||
if 'SST' in line and not self.xlsx_keys['方案编号']:
|
||||
self.xlsx_keys['方案编号'] = line
|
||||
|
||||
if '签发日期' in line:
|
||||
if '签发日期' in line and not self.xlsx_keys['签发日期']:
|
||||
self.xlsx_keys['签发日期'] = line.replace('签发日期', '')
|
||||
|
||||
if 'GLP' in line:
|
||||
self.xlsx_keys['标志'] += 'GLP,'
|
||||
|
||||
valid_time = self.is_valid_time(line)
|
||||
if valid_time:
|
||||
valid_time_list.append(valid_time)
|
||||
@ -85,7 +89,6 @@ class Discern(Base):
|
||||
if '最终报告' in line or 'Final Report' in line:
|
||||
self.xlsx_keys['检测项目'] = line_str.replace('最终报告', '').replace('Final Report', '')\
|
||||
.replace('中国认可国际互认检测', '')
|
||||
self.xlsx_keys['标志'] = ''
|
||||
|
||||
company_list = company.split()
|
||||
for company_str in company_list:
|
||||
@ -161,8 +164,7 @@ class Discern(Base):
|
||||
self.pdf_text(file_path)
|
||||
self.pdf_images(file_path)
|
||||
self.get_images_text()
|
||||
if not self.xlsx_keys['方案编号'] and not self.xlsx_keys['签发日期']:
|
||||
self.pdf_all_text(file_path)
|
||||
self.pdf_all_text(file_path)
|
||||
if not self.xlsx_keys['方案编号']:
|
||||
matches = re.findall(r'SST\d+BB', file_name)
|
||||
if matches:
|
||||
|
Loading…
x
Reference in New Issue
Block a user