|
@@ -2,7 +2,6 @@ package com.simuwang.daq.components.report.parser.pdf;
|
|
|
|
|
|
import cn.hutool.core.collection.CollUtil;
|
|
import cn.hutool.core.collection.CollUtil;
|
|
import cn.hutool.core.util.StrUtil;
|
|
import cn.hutool.core.util.StrUtil;
|
|
-import com.simuwang.base.common.conts.Constants;
|
|
|
|
import com.simuwang.base.common.exception.APIException;
|
|
import com.simuwang.base.common.exception.APIException;
|
|
import com.simuwang.base.mapper.EmailFieldMappingMapper;
|
|
import com.simuwang.base.mapper.EmailFieldMappingMapper;
|
|
import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
|
|
import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
|
|
@@ -14,7 +13,10 @@ import com.simuwang.daq.components.report.parser.AbstractReportParser;
|
|
import org.apache.pdfbox.Loader;
|
|
import org.apache.pdfbox.Loader;
|
|
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
|
|
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
|
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
-import technology.tabula.*;
|
|
|
|
|
|
+import technology.tabula.CustomObjectExtractor;
|
|
|
|
+import technology.tabula.Page;
|
|
|
|
+import technology.tabula.PageIterator;
|
|
|
|
+import technology.tabula.Table;
|
|
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
|
|
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
|
|
|
|
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
@@ -39,10 +41,9 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
|
|
// 解析报告名称和表格
|
|
// 解析报告名称和表格
|
|
String reportName = null;
|
|
String reportName = null;
|
|
try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(params.getFilepath()))) {
|
|
try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(params.getFilepath()))) {
|
|
- CustomPDFTextStripper stripper = new CustomPDFTextStripper(document, 0);
|
|
|
|
|
|
+ CustomPDFTextStripper stripper = new CustomPDFTextStripper(document, 1);
|
|
stripper.setSortByPosition(true);
|
|
stripper.setSortByPosition(true);
|
|
- String text = stripper.getText(document).replace(Constants.WATERMARK_REPLACE, Constants.EMPTY);
|
|
|
|
- List<String> textList = StrUtil.split(text, System.lineSeparator());
|
|
|
|
|
|
+ List<String> textList = StrUtil.split(stripper.getText(document), System.lineSeparator());
|
|
textList.removeIf(StrUtil::isBlank);
|
|
textList.removeIf(StrUtil::isBlank);
|
|
if (CollUtil.isNotEmpty(textList)) {
|
|
if (CollUtil.isNotEmpty(textList)) {
|
|
reportName = this.matchReportName(textList.get(0));
|
|
reportName = this.matchReportName(textList.get(0));
|