Browse Source

临时保存

wangzaijun 7 months ago
parent
commit
a348f593ea

+ 0 - 5
service-base/src/main/java/com/simuwang/base/common/conts/Constants.java

@@ -1,17 +1,12 @@
 package com.simuwang.base.common.conts;
 
 
-import cn.hutool.core.util.StrUtil;
-
 /**
  * 通用常量信息
  *
  * @author ruoyi
  */
 public class Constants {
-    public static final String EMPTY = StrUtil.EMPTY;
-    public static final String WATERMARK_REPLACE = System.lineSeparator();
-
     public static final long DEFAULT_SERIAL_ID = 999L;
 
     /**

+ 4 - 5
service-daq/src/main/java/com/simuwang/daq/components/CustomPDFTextStripper.java

@@ -2,9 +2,8 @@ package com.simuwang.daq.components;
 
 import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.ListUtil;
-import com.simuwang.base.common.conts.Constants;
+import cn.hutool.core.util.StrUtil;
 import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;
 import technology.tabula.TextStripper;
 
@@ -34,7 +33,7 @@ public class CustomPDFTextStripper extends TextStripper {
         }
         // 如果全是水印文字则直接去除
         if (textPositions.size() == weights.size()) {
-            super.writeString(Constants.WATERMARK_REPLACE);
+            super.writeString(System.lineSeparator());
             return;
         }
         // 否则去除水印(文字没有旋转角度,并且水印字体大小没有包含当前文字时说明是正常文字;否则识别为水印并用特殊符号代替)
@@ -42,8 +41,8 @@ public class CustomPDFTextStripper extends TextStripper {
         for (TextPosition textPosition : textPositions) {
             float col = textPosition.getTextMatrix().getValue(0, 1);
             float width = textPosition.getWidth();
-            newTexts.add(col == 0. && !weights.contains(width) ? textPosition.getUnicode() : Constants.WATERMARK_REPLACE);
+            newTexts.add(col == 0. && !weights.contains(width) ? textPosition.getUnicode() : System.lineSeparator());
         }
-        super.writeString(String.join(Constants.EMPTY, newTexts));
+        super.writeString(String.join(StrUtil.EMPTY, newTexts));
     }
 }

+ 3 - 1
service-daq/src/main/java/com/simuwang/daq/components/report/parser/AbstractReportParser.java

@@ -1,6 +1,7 @@
 package com.simuwang.daq.components.report.parser;
 
 import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.ReflectUtil;
 import cn.hutool.core.util.StrUtil;
 import com.simuwang.base.common.conts.Constants;
@@ -24,6 +25,7 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
 
     public AbstractReportParser(EmailFieldMappingMapper fieldMappingMapper) {
         this.fieldMappingMapper = fieldMappingMapper;
+        this.fieldMapper = MapUtil.newHashMap(128);
     }
 
     @Override
@@ -70,7 +72,7 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
             fieldValue = null;
         }
         if (fieldValue != null) {
-            fieldValue = fieldValue.replace("\r", Constants.EMPTY);
+            fieldValue = fieldValue.replace("\r", StrUtil.EMPTY);
         }
         return StrUtil.isBlank(fieldValue) ? null : fieldValue;
     }

+ 6 - 5
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/AbstractPDReportParser.java

@@ -2,7 +2,6 @@ package com.simuwang.daq.components.report.parser.pdf;
 
 import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.util.StrUtil;
-import com.simuwang.base.common.conts.Constants;
 import com.simuwang.base.common.exception.APIException;
 import com.simuwang.base.mapper.EmailFieldMappingMapper;
 import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
@@ -14,7 +13,10 @@ import com.simuwang.daq.components.report.parser.AbstractReportParser;
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
 import org.apache.pdfbox.pdmodel.PDDocument;
-import technology.tabula.*;
+import technology.tabula.CustomObjectExtractor;
+import technology.tabula.Page;
+import technology.tabula.PageIterator;
+import technology.tabula.Table;
 import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
 
 import java.io.IOException;
@@ -39,10 +41,9 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         // 解析报告名称和表格
         String reportName = null;
         try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(params.getFilepath()))) {
-            CustomPDFTextStripper stripper = new CustomPDFTextStripper(document, 0);
+            CustomPDFTextStripper stripper = new CustomPDFTextStripper(document, 1);
             stripper.setSortByPosition(true);
-            String text = stripper.getText(document).replace(Constants.WATERMARK_REPLACE, Constants.EMPTY);
-            List<String> textList = StrUtil.split(text, System.lineSeparator());
+            List<String> textList = StrUtil.split(stripper.getText(document), System.lineSeparator());
             textList.removeIf(StrUtil::isBlank);
             if (CollUtil.isNotEmpty(textList)) {
                 reportName = this.matchReportName(textList.get(0));

+ 1 - 1
service-deploy/src/test/java/com/simuwang/ApplicationTest.java

@@ -45,7 +45,7 @@ public class ApplicationTest {
     @Test
     public void reportTest() {
         MailboxInfoDTO emailInfoDTO = this.buildMailbox();
-        Date startDate = DateUtil.parse("2024-09-30 08:50:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date startDate = DateUtil.parse("2024-09-30 10:50:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         Date endDate = DateUtil.parse("2024-09-30 19:40:00", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {
             emailParseService.parseEmail(emailInfoDTO, startDate, endDate);