mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-22 11:12:48 +08:00
提取pdf文件中图片和文字
This commit is contained in:
parent
2dc7cb4a6d
commit
97fccbd604
@ -1,5 +1,5 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from extract_from_pages import read_pdf
|
from program.extract_from_pages import read_pdf
|
||||||
from base import Base
|
from base import Base
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
@ -29,7 +29,8 @@ class TestingAgencyReport(PDFBase):
|
|||||||
valid_time_list = []
|
valid_time_list = []
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if 'S$T' in line or 'SST' in line:
|
if 'S$T' in line or 'SST' in line:
|
||||||
text_dict['方案编号'] = line.replace('S$T', 'SST').replace('试验方案编号:', '')
|
text_dict['方案编号'] = line.replace('S$T', 'SST')\
|
||||||
|
.replace('试验方案编号:', '').replace('$', '')
|
||||||
if 'CNAS' in line:
|
if 'CNAS' in line:
|
||||||
text_dict['标志'] = 'cnas中文,'
|
text_dict['标志'] = 'cnas中文,'
|
||||||
valid_time = self.is_valid_time(line)
|
valid_time = self.is_valid_time(line)
|
||||||
@ -65,7 +66,7 @@ class TestingAgencyReport(PDFBase):
|
|||||||
if 'CSTBB' in line:
|
if 'CSTBB' in line:
|
||||||
for li in line.split():
|
for li in line.split():
|
||||||
if 'CSTBB' in li:
|
if 'CSTBB' in li:
|
||||||
self.xlsx_keys['报告编号'] = li.strip().replace('报告编号:', '')
|
self.xlsx_keys['报告编号'] = li.strip().replace('报告编号:', '').replace(')', '')
|
||||||
if '样品名称' in line:
|
if '样品名称' in line:
|
||||||
try:
|
try:
|
||||||
self.xlsx_keys['样品名称'] = line.split()[1].strip().replace(': ', '')
|
self.xlsx_keys['样品名称'] = line.split()[1].strip().replace(': ', '')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user