Browse Source

Merge branch 'test' of http://112.74.196.215:3000/Tech2/data-daq into test

chenjianhua 6 months ago
parent
commit
e279c2a7c4
68 changed files with 2591 additions and 1510 deletions
  1. 2 0
      service-base/src/main/java/com/simuwang/base/common/conts/Constants.java
  2. 34 0
      service-base/src/main/java/com/simuwang/base/common/enums/ReportParserFileType.java
  3. 18 4
      service-base/src/main/java/com/simuwang/base/common/enums/ReportType.java
  4. 36 0
      service-base/src/main/java/com/simuwang/base/common/exception/ReportParseException.java
  5. 5 0
      service-base/src/main/java/com/simuwang/base/config/DaqProperties.java
  6. 1 0
      service-base/src/main/java/com/simuwang/base/mapper/EmailFieldMappingMapper.java
  7. 4 0
      service-base/src/main/java/com/simuwang/base/pojo/dos/EmailFieldMappingDO.java
  8. 3 3
      service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportAssetAllocationDO.java
  9. 52 2
      service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportFundInfoDO.java
  10. 4 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/AnnuallyReportData.java
  11. 57 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/BaseReportDTO.java
  12. 32 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/BaseReportLevelDTO.java
  13. 4 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/MonthlyReportData.java
  14. 6 1
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/PythonResult.java
  15. 4 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/QuarterlyReportData.java
  16. 16 9
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportAssetAllocationDTO.java
  17. 9 2
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportBaseInfoDTO.java
  18. 16 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportData.java
  19. 58 21
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFinancialIndicatorsDTO.java
  20. 66 9
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFundInfoDTO.java
  21. 14 8
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportInvestmentIndustryDTO.java
  22. 29 20
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportNetReportDTO.java
  23. 30 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParseStatus.java
  24. 29 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParserParams.java
  25. 25 20
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportShareChangeDTO.java
  26. 10 1
      service-base/src/main/java/com/simuwang/shiro/core/jwt/JwtContext.java
  27. 2 1
      service-base/src/main/resources/mapper/EmailFieldMappingMapper.xml
  28. 0 184
      service-daq/src/main/java/com/simuwang/daq/components/AbstractReportParser.java
  29. 23 19
      service-daq/src/main/java/com/simuwang/daq/components/CustomPDFTextStripper.java
  30. 190 0
      service-daq/src/main/java/com/simuwang/daq/components/CustomTabulaTextStripper.java
  31. 0 285
      service-daq/src/main/java/com/simuwang/daq/components/PDMonthlyReportParser.java
  32. 10 9
      service-daq/src/main/java/com/simuwang/daq/components/PythonReportConverter.java
  33. 0 18
      service-daq/src/main/java/com/simuwang/daq/components/ReportParser.java
  34. 117 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/AbstractReportParser.java
  35. 33 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParser.java
  36. 69 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParserConstant.java
  37. 32 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParserFactory.java
  38. 330 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/AbstractPDReportParser.java
  39. 156 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDAnnuallyReportParser.java
  40. 89 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDMonthlyReportParser.java
  41. 296 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDQuarterlyReportParser.java
  42. 78 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/AbstractPyReportParser.java
  43. 25 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/PythonAnnuallyReportParser.java
  44. 25 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/PythonMonthlyReportParser.java
  45. 25 0
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/PythonQuarterlyReportParser.java
  46. 56 0
      service-daq/src/main/java/com/simuwang/daq/components/report/writer/AbstractReportWriter.java
  47. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/AnnuallyReportWriter.java
  48. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/MonthlyReportWriter.java
  49. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/QuarterlyReportWriter.java
  50. 12 0
      service-daq/src/main/java/com/simuwang/daq/components/report/writer/ReportWriter.java
  51. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriterConstant.java
  52. 1 1
      service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriterFactory.java
  53. 0 73
      service-daq/src/main/java/com/simuwang/daq/components/writer/AbstractReportWriter.java
  54. 0 7
      service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriter.java
  55. 0 43
      service-daq/src/main/java/com/simuwang/daq/dto/MonthlyReportNavInfo.java
  56. 0 13
      service-daq/src/main/java/com/simuwang/daq/dto/ReportExtInfo.java
  57. 0 18
      service-daq/src/main/java/com/simuwang/daq/dto/ReportFileType.java
  58. 0 291
      service-daq/src/main/java/com/simuwang/daq/dto/ReportFundInfo.java
  59. 0 54
      service-daq/src/main/java/com/simuwang/daq/dto/ReportInfo.java
  60. 97 71
      service-daq/src/main/java/com/simuwang/daq/service/EmailParseService.java
  61. 4 2
      service-daq/src/main/java/com/simuwang/daq/service/ReportEmailParser.java
  62. 0 20
      service-daq/src/main/java/com/simuwang/daq/service/ReportParseService.java
  63. 269 255
      service-daq/src/main/java/com/simuwang/daq/utils/ReportParseUtil.java
  64. 61 0
      service-daq/src/main/java/technology/tabula/CustomObjectExtractor.java
  65. 0 12
      service-deploy/pom.xml
  66. 2 0
      service-deploy/src/main/resources/application.yml
  67. 21 9
      service-deploy/src/test/java/com/simuwang/ApplicationTest.java
  68. 0 21
      service-manage/src/main/java/com/simuwang/manage/api/test/ReportParseTestApi.java

+ 2 - 0
service-base/src/main/java/com/simuwang/base/common/conts/Constants.java

@@ -7,6 +7,8 @@ package com.simuwang.base.common.conts;
  * @author ruoyi
  * @author ruoyi
  */
  */
 public class Constants {
 public class Constants {
+    public static final String WATERMARK_REPLACE = "+_+" + System.lineSeparator();
+
     public static final long DEFAULT_SERIAL_ID = 999L;
     public static final long DEFAULT_SERIAL_ID = 999L;
 
 
     /**
     /**

+ 34 - 0
service-base/src/main/java/com/simuwang/base/common/enums/ReportParserFileType.java

@@ -0,0 +1,34 @@
+package com.simuwang.base.common.enums;
+
+import cn.hutool.core.util.StrUtil;
+
+import java.util.Arrays;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 10:57
+ * @description 解析文件格式类型,支持调用python接口解析
+ */
+public enum ReportParserFileType {
+    PDF("pdf"),
+    DOCX("docx"),
+    DOC("doc"),
+    XLSX("xlsx"),
+    XLS("xls"),
+    PYTHON("python");
+
+    private final String suffix;
+
+    ReportParserFileType(String suffix) {
+        this.suffix = suffix;
+    }
+
+    public static ReportParserFileType getBySuffix(String suffix) {
+        return Arrays.stream(ReportParserFileType.values())
+                .filter(e -> StrUtil.equals(e.getSuffix(), suffix)).findFirst().orElse(null);
+    }
+
+    public String getSuffix() {
+        return suffix;
+    }
+}

+ 18 - 4
service-base/src/main/java/com/simuwang/base/common/enums/ReportType.java

@@ -2,17 +2,31 @@ package com.simuwang.base.common.enums;
 
 
 import lombok.Getter;
 import lombok.Getter;
 
 
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
 @Getter
 @Getter
 public enum ReportType {
 public enum ReportType {
-    MONTHLY(0, "月报"),
-    QUARTERLY(1, "季报"),
-    ANNUALLY(2, "年报");
+    MONTHLY(0, "月报", new String[]{"月", "月度", "月报"}),
+    QUARTERLY(1, "季报", new String[]{"季", "季度", "季报"}),
+    ANNUALLY(2, "年报", new String[]{"年", "年度", "年报"});
 
 
     private final int type;
     private final int type;
     private final String label;
     private final String label;
+    private final String[] patterns;
 
 
-    ReportType(int type, String label) {
+    ReportType(int type, String label, String[] patterns) {
         this.type = type;
         this.type = type;
         this.label = label;
         this.label = label;
+        this.patterns = patterns;
+    }
+
+    public static String getAllPatterns() {
+        return String.join("|", patterns());
+    }
+
+    public static List<String> patterns() {
+        return Arrays.stream(ReportType.values()).flatMap(e -> Arrays.stream(e.getPatterns())).collect(Collectors.toList());
     }
     }
 }
 }

+ 36 - 0
service-base/src/main/java/com/simuwang/base/common/exception/ReportParseException.java

@@ -0,0 +1,36 @@
+package com.simuwang.base.common.exception;
+
+import cn.hutool.core.util.StrUtil;
+import com.smppw.common.pojo.enums.status.StatusCode;
+
+/**
+ * @author wangzaijun
+ * @date 2024/10/11 14:10
+ * @description 报告解析的异常
+ */
+public class ReportParseException extends RuntimeException {
+    private final Integer code;
+    private final String msg;
+
+    public ReportParseException(StatusCode statusCode) {
+        this(statusCode.getCode(), statusCode.getMsg());
+    }
+
+    public ReportParseException(Integer code, String msg) {
+        super(msg);
+        this.code = code;
+        this.msg = msg;
+    }
+
+    public ReportParseException(StatusCode statusCode, Object... msgs) {
+        this(statusCode.getCode(), StrUtil.format(statusCode.getMsg(), msgs));
+    }
+
+    public int getCode() {
+        return code;
+    }
+
+    public String getMsg() {
+        return msg;
+    }
+}

+ 5 - 0
service-base/src/main/java/com/simuwang/base/config/DaqProperties.java

@@ -32,6 +32,11 @@ public class DaqProperties {
      */
      */
     private String tokenSecret;
     private String tokenSecret;
     /**
     /**
+     * 是否开启python的报告解析功能,开启后报告全部用python接口来解析
+     * 当开启时要配置python解析地址
+     */
+    private Boolean enablePyParser = Boolean.FALSE;
+    /**
      * 报告解析的python接口地址
      * 报告解析的python接口地址
      */
      */
     private String pyBaseUrl = "http://localhost:8080";
     private String pyBaseUrl = "http://localhost:8080";

+ 1 - 0
service-base/src/main/java/com/simuwang/base/mapper/EmailFieldMappingMapper.java

@@ -11,6 +11,7 @@ public interface EmailFieldMappingMapper {
     /**
     /**
      * 获取净值文件字段识别映射配置
      * 获取净值文件字段识别映射配置
      *
      *
+     * @param type 0-公共的字段,1-净值和估值表解析的字段,3-定期报告解析的字段
      * @return 净值文件字段识别映射配置
      * @return 净值文件字段识别映射配置
      */
      */
     List<EmailFieldMappingDO> getEmailFieldMapping(Integer type);
     List<EmailFieldMappingDO> getEmailFieldMapping(Integer type);

+ 4 - 0
service-base/src/main/java/com/simuwang/base/pojo/dos/EmailFieldMappingDO.java

@@ -26,6 +26,10 @@ public class EmailFieldMappingDO {
     @TableField(value = "name")
     @TableField(value = "name")
     private String name;
     private String name;
     /**
     /**
+     * 1-净值或估值表,3-定期报告,0-表示共用的,默认0
+     */
+    private Integer type;
+    /**
      * 记录的有效性;1-有效;0-无效;
      * 记录的有效性;1-有效;0-无效;
      */
      */
     @TableField(value = "isvalid")
     @TableField(value = "isvalid")

+ 3 - 3
service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportAssetAllocationDO.java

@@ -16,15 +16,15 @@ import java.math.BigDecimal;
 @TableName("amac_report_asset_allocation")
 @TableName("amac_report_asset_allocation")
 public class ReportAssetAllocationDO extends BaseReportDO {
 public class ReportAssetAllocationDO extends BaseReportDO {
     /**
     /**
-     * 资产类
+     * 资产
      */
      */
     private String assetType;
     private String assetType;
     /**
     /**
-     * 资产类别
+     * 资产明细
      */
      */
     private String columnName;
     private String columnName;
     /**
     /**
-     * 资产类别
+     * 市值
      */
      */
     private BigDecimal marketValue;
     private BigDecimal marketValue;
     /**
     /**

+ 52 - 2
service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportFundInfoDO.java

@@ -17,23 +17,73 @@ import java.util.Date;
 @Getter
 @Getter
 @TableName("amac_report_fund_info")
 @TableName("amac_report_fund_info")
 public class ReportFundInfoDO extends BaseReportDO {
 public class ReportFundInfoDO extends BaseReportDO {
+    /**
+     * 投资顾问
+     */
     private String advisorName;
     private String advisorName;
+    /**
+     * 基金托管人
+     */
     private String custodianName;
     private String custodianName;
+    /**
+     * 基金经理描述
+     */
     private String fundManager;
     private String fundManager;
+    /**
+     * 基金名称
+     */
     private String fundName;
     private String fundName;
+    /**
+     * 投资策略
+     */
     private String fundStrategyDescription;
     private String fundStrategyDescription;
+    /**
+     * 基金成立日期
+     */
     private Date inceptionDate;
     private Date inceptionDate;
+    /**
+     * 行业趋势
+     */
     private String industryTrend;
     private String industryTrend;
+    /**
+     * 投资目标
+     */
     private String investmentObjective;
     private String investmentObjective;
+    /**
+     * 杠杆比例
+     */
     private BigDecimal leverage;
     private BigDecimal leverage;
+    /**
+     * 杠杆比例描述
+     */
     private String leverageNote;
     private String leverageNote;
+    /**
+     * 基金运作方式
+     */
     private String operationType;
     private String operationType;
+    /**
+     * 备案编码
+     */
     private String registerNumber;
     private String registerNumber;
+    /**
+     * 风险收益特征
+     */
     private String riskReturnDesc;
     private String riskReturnDesc;
+    /**
+     * 业绩比较基准
+     */
     private String secondaryBenchmark;
     private String secondaryBenchmark;
+    /**
+     * 基金管理人
+     */
     private String trustName;
     private String trustName;
-
+    /**
+     * 基金到期日期
+     */
     private Date dueDate;
     private Date dueDate;
+    /**
+     * 信息披露报告是否经托管机构复核
+     */
     @TableField(value = "reviewed")
     @TableField(value = "reviewed")
-    private Integer isReviewed;
+    private Integer reviewed;
 }
 }

+ 4 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/AnnuallyReportData.java

@@ -7,6 +7,10 @@ import lombok.Setter;
 @Setter
 @Setter
 @Getter
 @Getter
 public class AnnuallyReportData extends QuarterlyReportData {
 public class AnnuallyReportData extends QuarterlyReportData {
+    public AnnuallyReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
+        super(baseInfo, fundInfo);
+    }
+
     @Override
     @Override
     public ReportType getReportType() {
     public ReportType getReportType() {
         return ReportType.ANNUALLY;
         return ReportType.ANNUALLY;

+ 57 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/BaseReportDTO.java

@@ -1,18 +1,75 @@
 package com.simuwang.base.pojo.dto.report;
 package com.simuwang.base.pojo.dto.report;
 
 
+import cn.hutool.core.date.DatePattern;
+import cn.hutool.core.date.DateUtil;
+import cn.hutool.core.util.StrUtil;
 import com.simuwang.base.pojo.dos.report.BaseReportDO;
 import com.simuwang.base.pojo.dos.report.BaseReportDO;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
+import java.math.BigDecimal;
+import java.util.Date;
+
+/**
+ * @author wangzaijun
+ * @date 2024/10/9 11:08
+ * @description 抽象的报告数据父类,全部字段用string传递
+ */
 @Setter
 @Setter
 @Getter
 @Getter
 public abstract class BaseReportDTO<T extends BaseReportDO> {
 public abstract class BaseReportDTO<T extends BaseReportDO> {
     private Integer fileId;
     private Integer fileId;
 
 
+    public BaseReportDTO() {
+    }
+
+    public BaseReportDTO(Integer fileId) {
+        this.fileId = fileId;
+    }
+
     public abstract T toEntity();
     public abstract T toEntity();
 
 
     @Override
     @Override
     public String toString() {
     public String toString() {
         return "fileId=" + fileId;
         return "fileId=" + fileId;
     }
     }
+
+    /**
+     * 字符串转日期类型
+     *
+     * @param input 待转换的字符串
+     * @return /
+     */
+    protected Date toDate(String input) {
+        if (StrUtil.isBlank(input)) {
+            return null;
+        }
+        try {
+            // 日期格式化,支持三种格式:yyyy年MM月dd日、yyyy-MM-dd和yyyy/MM/dd
+            return DateUtil.parse(input.trim(),
+                    DatePattern.CHINESE_DATE_PATTERN, DatePattern.NORM_DATE_PATTERN, "yyyy/MM/dd");
+        } catch (Exception ignored) {
+        }
+        return null;
+    }
+
+    /**
+     * 字符串转数字
+     *
+     * @param input 待转换的字符串
+     * @return /
+     */
+    protected BigDecimal toBigDecimal(String input) {
+        if (StrUtil.isBlank(input)) {
+            return null;
+        }
+        try {
+            // 移除所有非数字和“.”字符
+            String cleanedInput = input.trim().replaceAll("[^\\d.]", "");
+            // 创建BigDecimal对象
+            return new BigDecimal(cleanedInput);
+        } catch (NumberFormatException ignored) {
+        }
+        return null;
+    }
 }
 }

+ 32 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/BaseReportLevelDTO.java

@@ -0,0 +1,32 @@
+package com.simuwang.base.pojo.dto.report;
+
+import com.simuwang.base.pojo.dos.report.BaseReportDO;
+import lombok.Getter;
+import lombok.Setter;
+
+@Setter
+@Getter
+public abstract class BaseReportLevelDTO<T extends BaseReportDO> extends BaseReportDTO<T> {
+    /**
+     * 基金分级
+     */
+    private String level;
+
+    public BaseReportLevelDTO() {
+        super();
+    }
+
+    public BaseReportLevelDTO(Integer fileId) {
+        super(fileId);
+    }
+
+    public BaseReportLevelDTO(Integer fileId, String level) {
+        super(fileId);
+        this.level = level;
+    }
+
+    @Override
+    public String toString() {
+        return super.toString() + ", level='" + this.level + "'";
+    }
+}

+ 4 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/MonthlyReportData.java

@@ -11,6 +11,10 @@ import java.util.List;
 public class MonthlyReportData extends ReportData {
 public class MonthlyReportData extends ReportData {
     private List<ReportNetReportDTO> netReport;
     private List<ReportNetReportDTO> netReport;
 
 
+    public MonthlyReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
+        super(baseInfo, fundInfo);
+    }
+
     @Override
     @Override
     public ReportType getReportType() {
     public ReportType getReportType() {
         return ReportType.MONTHLY;
         return ReportType.MONTHLY;

+ 6 - 1
service-base/src/main/java/com/simuwang/base/pojo/dto/report/PythonResult.java

@@ -3,9 +3,14 @@ package com.simuwang.base.pojo.dto.report;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
+/**
+ * @author wangzaijun
+ * @date 2024/10/10 14:08
+ * @description 报告解析结果
+ */
 @Setter
 @Setter
 @Getter
 @Getter
-public class PythonResult<T extends ReportData> {
+public class ParseResult<T extends ReportData> {
     private Integer status;
     private Integer status;
 
 
     private String msg;
     private String msg;

+ 4 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/QuarterlyReportData.java

@@ -19,6 +19,10 @@ public class QuarterlyReportData extends ReportData {
     private List<ReportInvestmentIndustryDTO> investmentIndustry;
     private List<ReportInvestmentIndustryDTO> investmentIndustry;
     private List<ReportShareChangeDTO> shareChange;
     private List<ReportShareChangeDTO> shareChange;
 
 
+    public QuarterlyReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
+        super(baseInfo, fundInfo);
+    }
+
     @Override
     @Override
     public ReportType getReportType() {
     public ReportType getReportType() {
         return ReportType.QUARTERLY;
         return ReportType.QUARTERLY;

+ 16 - 9
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportAssetAllocationDTO.java

@@ -4,8 +4,6 @@ import com.simuwang.base.pojo.dos.report.ReportAssetAllocationDO;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
-import java.math.BigDecimal;
-
 /**
 /**
  * @author wangzaijun
  * @author wangzaijun
  * @date 2024/9/26 16:43
  * @date 2024/9/26 16:43
@@ -15,28 +13,37 @@ import java.math.BigDecimal;
 @Getter
 @Getter
 public class ReportAssetAllocationDTO extends BaseReportDTO<ReportAssetAllocationDO> {
 public class ReportAssetAllocationDTO extends BaseReportDTO<ReportAssetAllocationDO> {
     /**
     /**
-     * 资产类
+     * 资产
      */
      */
     private String assetType;
     private String assetType;
     /**
     /**
-     * 资产类别
+     * 资产明细
      */
      */
-    private String columnName;
+    private String assetDetails;
     /**
     /**
-     * 资产类别
+     * 市值
      */
      */
-    private BigDecimal marketValue;
+    private String marketValue;
     /**
     /**
      * 备注
      * 备注
      */
      */
     private String remark;
     private String remark;
 
 
+    public ReportAssetAllocationDTO() {
+        super();
+    }
+
+    public ReportAssetAllocationDTO(Integer fileId) {
+        super(fileId);
+    }
+
     @Override
     @Override
     public ReportAssetAllocationDO toEntity() {
     public ReportAssetAllocationDO toEntity() {
         ReportAssetAllocationDO entity = new ReportAssetAllocationDO();
         ReportAssetAllocationDO entity = new ReportAssetAllocationDO();
         entity.setFileId(this.getFileId());
         entity.setFileId(this.getFileId());
         entity.setAssetType(this.assetType);
         entity.setAssetType(this.assetType);
-        entity.setMarketValue(this.marketValue);
+        entity.setColumnName(this.assetDetails);
+        entity.setMarketValue(this.toBigDecimal(this.marketValue));
         entity.setRemark(this.remark);
         entity.setRemark(this.remark);
         return entity;
         return entity;
     }
     }
@@ -46,7 +53,7 @@ public class ReportAssetAllocationDTO extends BaseReportDTO<ReportAssetAllocatio
         return "{" +
         return "{" +
                 super.toString() +
                 super.toString() +
                 ", assetType='" + assetType + '\'' +
                 ", assetType='" + assetType + '\'' +
-                ", columnName='" + columnName + '\'' +
+                ", assetDetails='" + assetDetails + '\'' +
                 ", marketValue=" + marketValue +
                 ", marketValue=" + marketValue +
                 ", remark='" + remark + '\'' +
                 ", remark='" + remark + '\'' +
                 '}';
                 '}';

+ 9 - 2
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportBaseInfoDTO.java

@@ -1,6 +1,5 @@
 package com.simuwang.base.pojo.dto.report;
 package com.simuwang.base.pojo.dto.report;
 
 
-import cn.hutool.core.date.DateUtil;
 import com.simuwang.base.pojo.dos.report.ReportBaseInfoDO;
 import com.simuwang.base.pojo.dos.report.ReportBaseInfoDO;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
@@ -26,11 +25,19 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
      */
      */
     private String reportType;
     private String reportType;
 
 
+    public ReportBaseInfoDTO() {
+        super();
+    }
+
+    public ReportBaseInfoDTO(Integer fileId) {
+        super(fileId);
+    }
+
     @Override
     @Override
     public ReportBaseInfoDO toEntity() {
     public ReportBaseInfoDO toEntity() {
         ReportBaseInfoDO entity = new ReportBaseInfoDO();
         ReportBaseInfoDO entity = new ReportBaseInfoDO();
         entity.setFileId(this.getFileId());
         entity.setFileId(this.getFileId());
-        entity.setReportDate(this.reportDate == null ? null : DateUtil.parseDate(this.reportDate));
+        entity.setReportDate(this.toDate(this.reportDate));
         entity.setReportName(this.reportName);
         entity.setReportName(this.reportName);
         entity.setReportType(this.reportType);
         entity.setReportType(this.reportType);
         return entity;
         return entity;

+ 16 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportData.java

@@ -4,12 +4,28 @@ import com.simuwang.base.common.enums.ReportType;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 9:32
+ * @description 报告解析结果对象
+ */
 @Setter
 @Setter
 @Getter
 @Getter
 public abstract class ReportData {
 public abstract class ReportData {
+    /**
+     * 报告基本信息
+     */
     private ReportBaseInfoDTO baseInfo;
     private ReportBaseInfoDTO baseInfo;
+    /**
+     * 报告包含的基金基本新
+     */
     private ReportFundInfoDTO fundInfo;
     private ReportFundInfoDTO fundInfo;
 
 
+    public ReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
+        this.baseInfo = baseInfo;
+        this.fundInfo = fundInfo;
+    }
+
     public abstract ReportType getReportType();
     public abstract ReportType getReportType();
 
 
     @Override
     @Override

+ 58 - 21
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFinancialIndicatorsDTO.java

@@ -1,41 +1,77 @@
 package com.simuwang.base.pojo.dto.report;
 package com.simuwang.base.pojo.dto.report;
 
 
+import cn.hutool.core.util.StrUtil;
 import com.simuwang.base.pojo.dos.report.ReportFinancialIndicatorsDO;
 import com.simuwang.base.pojo.dos.report.ReportFinancialIndicatorsDO;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
-import java.math.BigDecimal;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 
 @Setter
 @Setter
 @Getter
 @Getter
-public class ReportFinancialIndicatorsDTO extends BaseReportDTO<ReportFinancialIndicatorsDO> {
-    private String level;
-
+public class ReportFinancialIndicatorsDTO extends BaseReportLevelDTO<ReportFinancialIndicatorsDO> {
     /**
     /**
      * 年度
      * 年度
      */
      */
-    private Integer endDate;
-
-    private BigDecimal fundAssetSize;
-    private BigDecimal nav;
-    private BigDecimal profit;
-    private BigDecimal realizedIncome;
+    private String yearly;
+    /**
+     * 期末基金净资产
+     */
+    private String assetNet;
+    /**
+     * 报告期期末单位净值
+     */
+    private String nav;
+    /**
+     * 本期利润
+     */
+    private String profit;
+    /**
+     * 本期已实现收益
+     */
+    private String realizedIncome;
     /**
     /**
      * 期末可供分配利润
      * 期末可供分配利润
      */
      */
-    private BigDecimal undistributedProfit;
+    private String undistributedProfit;
+    /**
+     * 期末可供分配基金份额利润
+     */
+    private String undistributedShareProfit;
+    /**
+     * 基金份额累计净值增长率
+     */
+    private String shareNavRet;
+
+    public ReportFinancialIndicatorsDTO() {
+        super();
+    }
+
+    public ReportFinancialIndicatorsDTO(Integer fileId) {
+        super(fileId);
+    }
+
+    public ReportFinancialIndicatorsDTO(Integer fileId, String level) {
+        super(fileId, level);
+    }
 
 
     @Override
     @Override
     public ReportFinancialIndicatorsDO toEntity() {
     public ReportFinancialIndicatorsDO toEntity() {
         ReportFinancialIndicatorsDO entity = new ReportFinancialIndicatorsDO();
         ReportFinancialIndicatorsDO entity = new ReportFinancialIndicatorsDO();
         entity.setFileId(this.getFileId());
         entity.setFileId(this.getFileId());
-        entity.setLevel(this.level);
-        entity.setEndDate(this.endDate);
-        entity.setFundAssetSize(this.fundAssetSize);
-        entity.setNav(this.nav);
-        entity.setProfit(this.profit);
-        entity.setRealizedIncome(this.realizedIncome);
-        entity.setUndistributedProfit(this.undistributedProfit);
+        entity.setLevel(this.getLevel());
+        entity.setFundAssetSize(this.toBigDecimal(this.assetNet));
+        entity.setNav(this.toBigDecimal(this.nav));
+        entity.setProfit(this.toBigDecimal(this.profit));
+        entity.setRealizedIncome(this.toBigDecimal(this.realizedIncome));
+        entity.setUndistributedProfit(this.toBigDecimal(this.undistributedProfit));
+        if (StrUtil.isNotBlank(this.yearly)) {
+            Matcher matcher = Pattern.compile("\\d+").matcher(this.yearly);
+            if (matcher.find()) {
+                entity.setEndDate(Integer.parseInt(matcher.group()));
+            }
+        }
         return entity;
         return entity;
     }
     }
 
 
@@ -43,13 +79,14 @@ public class ReportFinancialIndicatorsDTO extends BaseReportDTO<ReportFinancialI
     public String toString() {
     public String toString() {
         return "{" +
         return "{" +
                 super.toString() +
                 super.toString() +
-                ", level='" + level + '\'' +
-                ", endDate=" + endDate +
-                ", fundAssetSize=" + fundAssetSize +
+                ", yearly=" + yearly +
+                ", assetNet=" + assetNet +
                 ", nav=" + nav +
                 ", nav=" + nav +
                 ", profit=" + profit +
                 ", profit=" + profit +
                 ", undistributedProfit=" + undistributedProfit +
                 ", undistributedProfit=" + undistributedProfit +
                 ", realizedIncome=" + realizedIncome +
                 ", realizedIncome=" + realizedIncome +
+                ", undistributedShareProfit=" + undistributedShareProfit +
+                ", shareNavRet=" + shareNavRet +
                 '}';
                 '}';
     }
     }
 }
 }

+ 66 - 9
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFundInfoDTO.java

@@ -1,11 +1,10 @@
 package com.simuwang.base.pojo.dto.report;
 package com.simuwang.base.pojo.dto.report;
 
 
-import cn.hutool.core.date.DateUtil;
 import com.simuwang.base.pojo.dos.report.ReportFundInfoDO;
 import com.simuwang.base.pojo.dos.report.ReportFundInfoDO;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
-import java.math.BigDecimal;
+import java.util.Objects;
 
 
 /**
 /**
  * @author wangzaijun
  * @author wangzaijun
@@ -15,24 +14,82 @@ import java.math.BigDecimal;
 @Setter
 @Setter
 @Getter
 @Getter
 public class ReportFundInfoDTO extends BaseReportDTO<ReportFundInfoDO> {
 public class ReportFundInfoDTO extends BaseReportDTO<ReportFundInfoDO> {
+    /**
+     * 投资顾问
+     */
     private String advisorName;
     private String advisorName;
+    /**
+     * 基金托管人
+     */
     private String custodianName;
     private String custodianName;
+    /**
+     * 基金经理描述
+     */
     private String fundManager;
     private String fundManager;
+    /**
+     * 基金名称
+     */
     private String fundName;
     private String fundName;
+    /**
+     * 投资策略
+     */
     private String fundStrategyDescription;
     private String fundStrategyDescription;
+    /**
+     * 基金成立日期
+     */
     private String inceptionDate;
     private String inceptionDate;
+    /**
+     * 行业趋势
+     */
     private String industryTrend;
     private String industryTrend;
+    /**
+     * 投资目标
+     */
     private String investmentObjective;
     private String investmentObjective;
-    private BigDecimal leverage;
+    /**
+     * 杠杆比例
+     */
+    private String leverage;
+    /**
+     * 杠杆比例描述
+     */
     private String leverageNote;
     private String leverageNote;
+    /**
+     * 基金运作方式
+     */
     private String operationType;
     private String operationType;
+    /**
+     * 备案编码
+     */
     private String registerNumber;
     private String registerNumber;
+    /**
+     * 风险收益特征
+     */
     private String riskReturnDesc;
     private String riskReturnDesc;
+    /**
+     * 业绩比较基准
+     */
     private String secondaryBenchmark;
     private String secondaryBenchmark;
+    /**
+     * 基金管理人
+     */
     private String trustName;
     private String trustName;
-
+    /**
+     * 基金到期日期
+     */
     private String dueDate;
     private String dueDate;
-    private Integer isReviewed;
+    /**
+     * 信息披露报告是否经托管机构复核
+     */
+    private String isReviewed;
+
+    public ReportFundInfoDTO() {
+        super();
+    }
+
+    public ReportFundInfoDTO(Integer fileId) {
+        super(fileId);
+    }
 
 
     @Override
     @Override
     public ReportFundInfoDO toEntity() {
     public ReportFundInfoDO toEntity() {
@@ -43,18 +100,18 @@ public class ReportFundInfoDTO extends BaseReportDTO<ReportFundInfoDO> {
         entity.setFundManager(this.fundManager);
         entity.setFundManager(this.fundManager);
         entity.setFundName(this.fundName);
         entity.setFundName(this.fundName);
         entity.setFundStrategyDescription(this.fundStrategyDescription);
         entity.setFundStrategyDescription(this.fundStrategyDescription);
-        entity.setInceptionDate(this.inceptionDate == null ? null : DateUtil.parseDate(this.inceptionDate));
+        entity.setInceptionDate(this.toDate(this.inceptionDate));
         entity.setIndustryTrend(this.industryTrend);
         entity.setIndustryTrend(this.industryTrend);
         entity.setInvestmentObjective(this.investmentObjective);
         entity.setInvestmentObjective(this.investmentObjective);
-        entity.setLeverage(this.leverage);
+        entity.setLeverage(this.toBigDecimal(this.leverage));
         entity.setLeverageNote(this.leverageNote);
         entity.setLeverageNote(this.leverageNote);
         entity.setOperationType(this.operationType);
         entity.setOperationType(this.operationType);
         entity.setRegisterNumber(this.registerNumber);
         entity.setRegisterNumber(this.registerNumber);
         entity.setRiskReturnDesc(this.riskReturnDesc);
         entity.setRiskReturnDesc(this.riskReturnDesc);
         entity.setSecondaryBenchmark(this.secondaryBenchmark);
         entity.setSecondaryBenchmark(this.secondaryBenchmark);
         entity.setTrustName(this.trustName);
         entity.setTrustName(this.trustName);
-        entity.setDueDate(this.dueDate == null ? null : DateUtil.parseDate(this.dueDate));
-        entity.setIsReviewed(this.isReviewed);
+        entity.setDueDate(this.toDate(this.dueDate));
+        entity.setReviewed(Objects.equals("是", this.isReviewed) ? 1 : 0);
         return entity;
         return entity;
     }
     }
 
 

+ 14 - 8
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportInvestmentIndustryDTO.java

@@ -4,8 +4,6 @@ import com.simuwang.base.pojo.dos.report.ReportInvestmentIndustryDO;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
-import java.math.BigDecimal;
-
 /**
 /**
  * @author wangzaijun
  * @author wangzaijun
  * @date 2024/9/26 16:49
  * @date 2024/9/26 16:49
@@ -31,13 +29,21 @@ public class ReportInvestmentIndustryDTO extends BaseReportDTO<ReportInvestmentI
      */
      */
     private String isbCode;
     private String isbCode;
     /**
     /**
-     * 公允价值
+     * 公允价值,市值
      */
      */
-    private BigDecimal marketValue;
+    private String marketValue;
     /**
     /**
-     * 占基金资产净值的比例
+     * 占基金资产净值的比例,占净值比,权重
      */
      */
-    private BigDecimal ratio;
+    private String ratio;
+
+    public ReportInvestmentIndustryDTO() {
+        super();
+    }
+
+    public ReportInvestmentIndustryDTO(Integer fileId) {
+        super(fileId);
+    }
 
 
     @Override
     @Override
     public ReportInvestmentIndustryDO toEntity() {
     public ReportInvestmentIndustryDO toEntity() {
@@ -47,8 +53,8 @@ public class ReportInvestmentIndustryDTO extends BaseReportDTO<ReportInvestmentI
         entity.setIndustryName(this.industryName);
         entity.setIndustryName(this.industryName);
         entity.setInvestType(this.investType);
         entity.setInvestType(this.investType);
         entity.setIsbCode(this.isbCode);
         entity.setIsbCode(this.isbCode);
-        entity.setMarketValue(this.marketValue);
-        entity.setRatio(this.ratio);
+        entity.setMarketValue(this.toBigDecimal(this.marketValue));
+        entity.setRatio(this.toBigDecimal(this.ratio));
         return entity;
         return entity;
     }
     }
 
 

+ 29 - 20
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportNetReportDTO.java

@@ -1,12 +1,9 @@
 package com.simuwang.base.pojo.dto.report;
 package com.simuwang.base.pojo.dto.report;
 
 
-import cn.hutool.core.date.DateUtil;
 import com.simuwang.base.pojo.dos.report.ReportNetReportDO;
 import com.simuwang.base.pojo.dos.report.ReportNetReportDO;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
-import java.math.BigDecimal;
-
 /**
 /**
  * @author wangzaijun
  * @author wangzaijun
  * @date 2024/9/26 16:53
  * @date 2024/9/26 16:53
@@ -14,37 +11,50 @@ import java.math.BigDecimal;
  */
  */
 @Setter
 @Setter
 @Getter
 @Getter
-public class ReportNetReportDTO extends BaseReportDTO<ReportNetReportDO> {
-    private String level;
+public class ReportNetReportDTO extends BaseReportLevelDTO<ReportNetReportDO> {
+    /**
+     * 估值日期
+     */
     private String valuationDate;
     private String valuationDate;
-
     /**
     /**
      * 累计净值
      * 累计净值
      */
      */
-    private BigDecimal cumulativeNav;
+    private String cumulativeNavWithdrawal;
     /**
     /**
      * 基金份额总额
      * 基金份额总额
      */
      */
-    private BigDecimal endTotalShares;
+    private String assetShare;
     /**
     /**
      * 基金资产净值
      * 基金资产净值
      */
      */
-    private BigDecimal fundAssetSize;
+    private String assetNet;
     /**
     /**
      * 单位净值
      * 单位净值
      */
      */
-    private BigDecimal nav;
+    private String nav;
+
+    public ReportNetReportDTO() {
+        super();
+    }
+
+    public ReportNetReportDTO(Integer fileId) {
+        super(fileId);
+    }
+
+    public ReportNetReportDTO(Integer fileId, String level) {
+        super(fileId, level);
+    }
 
 
     @Override
     @Override
     public ReportNetReportDO toEntity() {
     public ReportNetReportDO toEntity() {
         ReportNetReportDO entity = new ReportNetReportDO();
         ReportNetReportDO entity = new ReportNetReportDO();
         entity.setFileId(this.getFileId());
         entity.setFileId(this.getFileId());
-        entity.setLevel(this.level);
-        entity.setValuationDate(this.valuationDate == null ? null : DateUtil.parseDate(this.valuationDate));
-        entity.setCumulativeNav(this.cumulativeNav);
-        entity.setEndTotalShares(this.endTotalShares);
-        entity.setFundAssetSize(this.fundAssetSize);
-        entity.setNav(this.nav);
+        entity.setLevel(this.getLevel());
+        entity.setValuationDate(this.toDate(this.valuationDate));
+        entity.setCumulativeNav(this.toBigDecimal(this.cumulativeNavWithdrawal));
+        entity.setEndTotalShares(this.toBigDecimal(this.assetShare));
+        entity.setFundAssetSize(this.toBigDecimal(this.assetNet));
+        entity.setNav(this.toBigDecimal(this.nav));
         return entity;
         return entity;
     }
     }
 
 
@@ -52,11 +62,10 @@ public class ReportNetReportDTO extends BaseReportDTO<ReportNetReportDO> {
     public String toString() {
     public String toString() {
         return "{" +
         return "{" +
                 super.toString() +
                 super.toString() +
-                ", level='" + level + '\'' +
                 ", valuationDate='" + valuationDate + '\'' +
                 ", valuationDate='" + valuationDate + '\'' +
-                ", cumulativeNav=" + cumulativeNav +
-                ", endTotalShares=" + endTotalShares +
-                ", fundAssetSize=" + fundAssetSize +
+                ", cumulativeNavWithdrawal=" + cumulativeNavWithdrawal +
+                ", assetShare=" + assetShare +
+                ", fundAssetSize=" + assetNet +
                 ", nav=" + nav +
                 ", nav=" + nav +
                 '}';
                 '}';
     }
     }

+ 30 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParseStatus.java

@@ -0,0 +1,30 @@
+package com.simuwang.base.pojo.dto.report;
+
+import com.smppw.common.pojo.enums.status.StatusCode;
+
+public enum ReportParseStatus implements StatusCode {
+    PARSE_FAIL(21000, "定期报告解析错误:{}"),
+    NOT_A_REPORT(21001, "不是定期报告"),
+    REPORT_IS_SCAN(21002, "报告为扫描件"),
+    NO_SUPPORT_TEMPLATE(21003, "不支持的报告文件格式"),
+    NOT_A_FIXED_FORMAT(21004, "不是基协统一格式"),
+    PARSE_FUND_INFO_FAIL(21010, "没有解析到报告中的基金基本信息"),
+    ;
+    private final int code;
+    private final String msg;
+
+    ReportParseStatus(int code, String msg) {
+        this.code = code;
+        this.msg = msg;
+    }
+
+    @Override
+    public int getCode() {
+        return this.code;
+    }
+
+    @Override
+    public String getMsg() {
+        return this.msg;
+    }
+}

+ 29 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParserParams.java

@@ -0,0 +1,29 @@
+package com.simuwang.base.pojo.dto.report;
+
+import lombok.*;
+
+@Getter
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+@ToString
+public class ReportParserParams {
+    /**
+     * 文件id
+     * 报告解析表的关联字段
+     */
+    private Integer fileId;
+    /**
+     * 文件名称
+     * 优先从这个名称里先获取基金备案编码,没有就不获取
+     */
+    private String filename;
+    /**
+     * 文件路径
+     */
+    private String filepath;
+    /**
+     * 备案编码
+     */
+    private String registerNumber;
+}

+ 25 - 20
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportShareChangeDTO.java

@@ -4,8 +4,6 @@ import com.simuwang.base.pojo.dos.report.ReportShareChangeDO;
 import lombok.Getter;
 import lombok.Getter;
 import lombok.Setter;
 import lombok.Setter;
 
 
-import java.math.BigDecimal;
-
 /**
 /**
  * @author wangzaijun
  * @author wangzaijun
  * @date 2024/9/26 16:40
  * @date 2024/9/26 16:40
@@ -13,42 +11,50 @@ import java.math.BigDecimal;
  */
  */
 @Setter
 @Setter
 @Getter
 @Getter
-public class ReportShareChangeDTO extends BaseReportDTO<ReportShareChangeDO> {
-    /**
-     * 基金分级
-     */
-    private String level;
+public class ReportShareChangeDTO extends BaseReportLevelDTO<ReportShareChangeDO> {
     /**
     /**
      * 报告期期初基金份额总额
      * 报告期期初基金份额总额
      */
      */
-    private BigDecimal initTotalShares;
+    private String initTotalShares;
     /**
     /**
      * 减: 报告期期间基金总赎回份额
      * 减: 报告期期间基金总赎回份额
      */
      */
-    private BigDecimal redemption;
+    private String redemption;
     /**
     /**
      * 期末基金总份额/期末基金实缴总额
      * 期末基金总份额/期末基金实缴总额
      */
      */
-    private BigDecimal sharePerAsset;
+    private String sharePerAsset;
     /**
     /**
      * 报告期期间基金拆分变动份额
      * 报告期期间基金拆分变动份额
      */
      */
-    private BigDecimal split;
+    private String splitChangeShare;
     /**
     /**
      * 报告期期间基金总申购份额
      * 报告期期间基金总申购份额
      */
      */
-    private BigDecimal subscription;
+    private String subscription;
+
+    public ReportShareChangeDTO() {
+        super();
+    }
+
+    public ReportShareChangeDTO(Integer fileId) {
+        super(fileId);
+    }
+
+    public ReportShareChangeDTO(Integer fileId, String level) {
+        super(fileId, level);
+    }
 
 
     @Override
     @Override
     public ReportShareChangeDO toEntity() {
     public ReportShareChangeDO toEntity() {
         ReportShareChangeDO entity = new ReportShareChangeDO();
         ReportShareChangeDO entity = new ReportShareChangeDO();
         entity.setFileId(this.getFileId());
         entity.setFileId(this.getFileId());
-        entity.setLevel(this.level);
-        entity.setRedemption(this.redemption);
-        entity.setInitTotalShares(this.initTotalShares);
-        entity.setSharePerAsset(this.sharePerAsset);
-        entity.setSplit(this.split);
-        entity.setSubscription(this.subscription);
+        entity.setLevel(this.getLevel());
+        entity.setRedemption(this.toBigDecimal(this.redemption));
+        entity.setInitTotalShares(this.toBigDecimal(this.initTotalShares));
+        entity.setSharePerAsset(this.toBigDecimal(this.sharePerAsset));
+        entity.setSplit(this.toBigDecimal(this.splitChangeShare));
+        entity.setSubscription(this.toBigDecimal(this.subscription));
         return entity;
         return entity;
     }
     }
 
 
@@ -56,11 +62,10 @@ public class ReportShareChangeDTO extends BaseReportDTO<ReportShareChangeDO> {
     public String toString() {
     public String toString() {
         return "{" +
         return "{" +
                 super.toString() +
                 super.toString() +
-                ", level='" + level + '\'' +
                 ", initTotalShares=" + initTotalShares +
                 ", initTotalShares=" + initTotalShares +
                 ", redemption=" + redemption +
                 ", redemption=" + redemption +
                 ", sharePerAsset=" + sharePerAsset +
                 ", sharePerAsset=" + sharePerAsset +
-                ", split=" + split +
+                ", splitChangeShare=" + splitChangeShare +
                 ", subscription=" + subscription +
                 ", subscription=" + subscription +
                 '}';
                 '}';
     }
     }

+ 10 - 1
service-base/src/main/java/com/simuwang/shiro/core/jwt/JwtContext.java

@@ -6,6 +6,7 @@ import com.github.benmanes.caffeine.cache.Cache;
 import com.github.benmanes.caffeine.cache.Caffeine;
 import com.github.benmanes.caffeine.cache.Caffeine;
 import com.simuwang.base.config.DaqProperties;
 import com.simuwang.base.config.DaqProperties;
 import io.jsonwebtoken.Claims;
 import io.jsonwebtoken.Claims;
+import io.jsonwebtoken.ExpiredJwtException;
 import io.jsonwebtoken.Jwts;
 import io.jsonwebtoken.Jwts;
 import io.jsonwebtoken.security.Keys;
 import io.jsonwebtoken.security.Keys;
 import org.springframework.stereotype.Component;
 import org.springframework.stereotype.Component;
@@ -38,7 +39,7 @@ public class JwtContext {
         if (MapUtil.isEmpty(tokenMap)) {
         if (MapUtil.isEmpty(tokenMap)) {
             tokenMap = MapUtil.newConcurrentHashMap(16);
             tokenMap = MapUtil.newConcurrentHashMap(16);
         }
         }
-        tokenMap.putIfAbsent(requestIP, token);
+        tokenMap.put(requestIP, token);
         USER_TOKEN_CACHE.put(username, tokenMap);
         USER_TOKEN_CACHE.put(username, tokenMap);
     }
     }
 
 
@@ -75,6 +76,14 @@ public class JwtContext {
      */
      */
     public synchronized String generateToken(String username, String requestIp) {
     public synchronized String generateToken(String username, String requestIp) {
         String token = this.getUserCache(username, requestIp);
         String token = this.getUserCache(username, requestIp);
+        if (StrUtil.isNotBlank(token)) {
+            try {
+                // 如果token过期则重新生成
+                this.getClaimsByToken(token);
+            } catch (ExpiredJwtException e) {
+                token = null;
+            }
+        }
         if (StrUtil.isBlank(token)) {
         if (StrUtil.isBlank(token)) {
             SecretKey signingKey = Keys.hmacShaKeyFor(this.properties.getTokenSecret().getBytes(StandardCharsets.UTF_8));
             SecretKey signingKey = Keys.hmacShaKeyFor(this.properties.getTokenSecret().getBytes(StandardCharsets.UTF_8));
             //过期时间
             //过期时间

+ 2 - 1
service-base/src/main/resources/mapper/EmailFieldMappingMapper.xml

@@ -5,6 +5,7 @@
         <id column="id" property="id"/>
         <id column="id" property="id"/>
         <result column="code" property="code"/>
         <result column="code" property="code"/>
         <result column="name" property="name"/>
         <result column="name" property="name"/>
+        <result column="type" property="type"/>
         <result column="isvalid" property="isvalid"/>
         <result column="isvalid" property="isvalid"/>
         <result column="creatorid" property="creatorId"/>
         <result column="creatorid" property="creatorId"/>
         <result column="createtime" property="createTime"/>
         <result column="createtime" property="createTime"/>
@@ -17,7 +18,7 @@
         from PPW_EMAIL.email_field_mapping
         from PPW_EMAIL.email_field_mapping
         where isvalid = 1
         where isvalid = 1
         <if test="type != null">
         <if test="type != null">
-            and(TYPE =#{type} or TYPE = 0)
+            and (TYPE = #{type} or TYPE = 0)
         </if>
         </if>
     </select>
     </select>
 
 

+ 0 - 184
service-daq/src/main/java/com/simuwang/daq/components/AbstractReportParser.java

@@ -1,184 +0,0 @@
-package com.simuwang.daq.components;
-
-import cn.hutool.core.exceptions.ExceptionUtil;
-import cn.hutool.core.map.MapUtil;
-import cn.hutool.core.util.StrUtil;
-import com.simuwang.daq.dto.ReportExtInfo;
-import com.simuwang.daq.dto.ReportFundInfo;
-import com.simuwang.daq.dto.ReportInfo;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.springframework.util.StopWatch;
-
-import java.io.IOException;
-import java.util.*;
-import java.util.concurrent.TimeUnit;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-public abstract class AbstractReportParser<EXT extends ReportExtInfo> implements ReportParser {
-    protected final Logger logger = LoggerFactory.getLogger(this.getClass());
-    protected String filepath;
-    protected Map<String, List<String>> watermarkListMap;
-
-    @Override
-    public void parse(Integer fileId, String filepath, String watermarkName) {
-        StopWatch watch = new StopWatch();
-        watch.start();
-        if (this.logger.isInfoEnabled()) {
-            this.logger.info("报告{} 开始解析!", filepath);
-        }
-        this.filepath = filepath;
-        this.watermarkListMap = this.generateWatermarkMap(watermarkName);
-        ReportInfo reportInfo = null;
-        ReportFundInfo reportFundInfo = null;
-        List<EXT> exts = null;
-        try {
-            this.initParse();
-            reportInfo = this.parseReportInfo(fileId);
-            reportFundInfo = this.parseBaseInfo();
-            exts = this.parseExtInfo();
-        } catch (Exception e) {
-            this.logger.error("报告{} 解析错误\n{}", filepath, ExceptionUtil.stacktraceToString(e));
-        }
-        this.saveResult(reportInfo, reportFundInfo, exts);
-        watch.stop();
-        if (this.logger.isInfoEnabled()) {
-            this.logger.info("报告{} 解析结束!耗时:{}s", filepath, watch.getTotalTime(TimeUnit.SECONDS));
-        }
-    }
-
-    protected abstract void initParse() throws IOException;
-
-    protected abstract ReportInfo parseReportInfo(Integer fileId);
-
-    protected abstract ReportFundInfo parseBaseInfo();
-
-    protected abstract List<EXT> parseExtInfo();
-
-    protected abstract void saveResult(ReportInfo reportInfo, ReportFundInfo reportFundInfo, List<EXT> exts);
-
-    private Map<String, List<String>> generateWatermarkMap(String watermarkName) {
-        Map<String, List<String>> result = MapUtil.newHashMap(32);
-        // 生成水印列表
-        String text = watermarkName;
-        text = text.replaceAll("[()]", ""); // 移除括号
-        List<String> textList = new ArrayList<>(new HashSet<>(convertStringToList(text)));
-        Collections.reverse(textList);
-        StringBuilder sb = new StringBuilder(textList.size());
-        for (String ch : textList) {
-            sb.append(ch);
-        }
-        String joinedText = sb.toString();
-
-        // 基本水印列表
-        List<String> wkList = new ArrayList<>();
-        for (String ch : textList) {
-            wkList.add(ch + "\r\n");
-            wkList.add("\r\n" + ch);
-        }
-
-        // 查找数字
-        List<String> matches = findDigits(watermarkName);
-        if (!matches.isEmpty()) {
-            for (String match : matches) {
-                wkList.add("\r\n" + match);
-                wkList.add(match + "\r\n");
-            }
-        }
-        wkList.add("-");
-        wkList.add("【");
-        wkList.add("】");
-        wkList.add("\r");
-        wkList.add("\n");
-        wkList.add("\r\n");
-
-        String noNumberText = removeDigits(joinedText);
-
-        // 生成不同字段的水印列表
-        result.put("report_name", new ArrayList<>(wkList));
-        result.get("report_name").addAll(convertStringToList("有限公司"));
-
-        result.put("less", new ArrayList<>(wkList));
-
-        result.put("more", new ArrayList<>(wkList));
-        result.get("more").addAll(convertStringToList(noNumberText));
-
-        result.put("leverage", new ArrayList<>(wkList));
-        result.get("leverage").addAll(convertStringToList(removeKeywords(noNumberText, "基金资产")));
-
-        result.put("base_info", new ArrayList<>(wkList));
-        result.get("base_info").addAll(convertStringToList(removeKeywords(text, "基", "金", "投资", "管理", "有", "份", "融", "资", "产", "本", "号", "收益", "策略", "期")));
-
-        result.put("industry", new ArrayList<>(wkList));
-        result.get("industry").addAll(convertStringToList(removeKeywords(noNumberText, "基金融公产")));
-
-        result.put("market_value", new ArrayList<>(Collections.singletonList("\n")));
-        return result;
-    }
-
-    private List<String> findDigits(String text) {
-        List<String> digits = new ArrayList<>();
-        Pattern pattern = Pattern.compile("\\d");
-        Matcher matcher = pattern.matcher(text);
-        while (matcher.find()) {
-            digits.add(matcher.group());
-        }
-        return digits;
-    }
-
-    private String removeDigits(String text) {
-        return text.replaceAll("\\d", "");
-    }
-
-    private String removeKeywords(String text, String... keywords) {
-        for (String keyword : keywords) {
-            text = text.replaceAll(keyword, "");
-        }
-        return text;
-    }
-
-    private List<String> convertStringToList(String text) {
-        List<String> charList = new ArrayList<>();
-        for (char c : text.toCharArray()) {
-            charList.add(c + "");
-        }
-        return charList;
-    }
-
-    protected String processString(List<String> wmList, String string) {
-        if (StrUtil.isBlank(string)) {
-            return null;
-        }
-        // 生成正则表达式模式
-        String pat = String.join("|", wmList);
-        // 使用正则表达式移除wmList中的元素
-        string = removeMatches(string, pat);
-        // 替换中文括号为英文括号
-        string = string.replace("(", "(").replace(")", ")");
-        // 移除空格
-        string = string.replace(" ", "");
-        // 如果字符串以括号开头,则移除第一个字符
-        if (startsWithParenthesis(string)) {
-            string = string.substring(1);
-        }
-
-        return string;
-    }
-
-    private String removeMatches(String input, String pattern) {
-        // 编译正则表达式
-        Pattern compiledPattern = Pattern.compile(pattern);
-        // 创建Matcher对象
-        Matcher matcher = compiledPattern.matcher(input);
-        // 使用replaceAll方法替换所有匹配到的字符为空字符串
-        return matcher.replaceAll("");
-    }
-
-    private boolean startsWithParenthesis(String input) {
-        // 匹配以括号开头的字符串
-        Pattern pattern = Pattern.compile("^[()].*");
-        Matcher matcher = pattern.matcher(input);
-        return matcher.find();
-    }
-}

+ 23 - 19
service-daq/src/main/java/com/simuwang/daq/components/CustomPDFTextStripper.java

@@ -2,41 +2,45 @@ package com.simuwang.daq.components;
 
 
 import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.util.StrUtil;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;
 import org.apache.pdfbox.text.TextPosition;
-import org.apache.pdfbox.util.Matrix;
 
 
 import java.io.IOException;
 import java.io.IOException;
 import java.util.List;
 import java.util.List;
+import java.util.stream.Collectors;
+
+import static com.simuwang.base.common.conts.Constants.WATERMARK_REPLACE;
 
 
 /**
 /**
  * @author wangzaijun
  * @author wangzaijun
  * @date 2024/9/12 14:00
  * @date 2024/9/12 14:00
  * @description 自定义的文本去水印方法,发现水印基本是旋转文字并且比报告内其他文字都大
  * @description 自定义的文本去水印方法,发现水印基本是旋转文字并且比报告内其他文字都大
+ * @see CustomTabulaTextStripper 区别于表格文字去水印的实现
  */
  */
 public class CustomPDFTextStripper extends PDFTextStripper {
 public class CustomPDFTextStripper extends PDFTextStripper {
-    private final float[] watermarkWidth = {0f};
-
     @Override
     @Override
     protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
     protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
+        // 水印文字基本都是有角度的,统计有旋转角度的文字高度
+        List<Float> heights = textPositions.stream().filter(e -> e.getTextMatrix().getValue(0, 1) != 0.)
+                .map(TextPosition::getHeight).collect(Collectors.toList());
+        // 集合为空表示text的内容没有水印影响,直接输出该内容
+        if (CollUtil.isEmpty(heights)) {
+            super.writeString(text);
+            return;
+        }
+        // 如果全是水印文字则直接去除
+        if (textPositions.size() == heights.size()) {
+            super.writeString(WATERMARK_REPLACE);
+            return;
+        }
+        // 否则去除水印(文字没有旋转角度,并且水印字体大小没有包含当前文字时说明是正常文字;否则识别为水印并用特殊符号代替)
         List<String> newTexts = ListUtil.list(false);
         List<String> newTexts = ListUtil.list(false);
         for (TextPosition textPosition : textPositions) {
         for (TextPosition textPosition : textPositions) {
-            Matrix textMatrix = textPosition.getTextMatrix();
-            float col = textMatrix.getValue(0, 1);
-            float width = textPosition.getWidth();
-            if (col == 0.) {
-                if (width < watermarkWidth[0]) {
-                    newTexts.add(textPosition.getUnicode());
-                }
-            } else {
-                if (width > watermarkWidth[0]) {
-                    watermarkWidth[0] = width;
-                }
-                newTexts.add("++");
-            }
-        }
-        if (CollUtil.isNotEmpty(newTexts)) {
-            super.writeString(String.join("", newTexts));
+            float col = textPosition.getTextMatrix().getValue(0, 1);
+            float height = textPosition.getHeight();
+            newTexts.add(col == 0. && !heights.contains(height) ? textPosition.getUnicode() : WATERMARK_REPLACE);
         }
         }
+        super.writeString(String.join(StrUtil.EMPTY, newTexts));
     }
     }
 }
 }

+ 190 - 0
service-daq/src/main/java/com/simuwang/daq/components/CustomTabulaTextStripper.java

@@ -0,0 +1,190 @@
+package com.simuwang.daq.components;
+
+import org.apache.fontbox.util.BoundingBox;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.font.PDFont;
+import org.apache.pdfbox.pdmodel.font.PDFontDescriptor;
+import org.apache.pdfbox.pdmodel.font.PDType3Font;
+import org.apache.pdfbox.text.TextPosition;
+import technology.tabula.RectangleSpatialIndex;
+import technology.tabula.TextElement;
+import technology.tabula.TextStripper;
+import technology.tabula.Utils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/12 14:00
+ * @description 自定义的文本去水印方法,发现水印基本是旋转文字并且比报告内其他文字都大;主要依据文本旋转角度和字体大小判断是否为水印
+ */
+public class CustomTabulaTextStripper extends TextStripper {
+    private static final String NBSP = "\u00A0";
+    private static final float AVG_HEIGHT_MULT_THRESHOLD = 6.0f;
+    private static final float MAX_BLANK_FONT_SIZE = 40.0f;
+    private static final float MIN_BLANK_FONT_SIZE = 2.0f;
+    private final PDDocument document;
+    private final ArrayList<TextElement> textElements;
+    private final RectangleSpatialIndex<TextElement> spatialIndex;
+    private float minCharWidth = Float.MAX_VALUE;
+    private float minCharHeight = Float.MAX_VALUE;
+    private float totalHeight = 0.0f;
+    private int countHeight = 0;
+
+    public CustomTabulaTextStripper(PDDocument document, int pageNumber) throws IOException {
+        super(document, pageNumber);
+        this.document = document;
+        this.setStartPage(pageNumber);
+        this.setEndPage(pageNumber);
+        this.textElements = new ArrayList<>();
+        this.spatialIndex = new RectangleSpatialIndex<>();
+    }
+
+    public void process() throws IOException {
+        this.getText(this.document);
+    }
+
+    @Override
+    protected void writeString(String string, List<TextPosition> textPositions) {
+        // 有旋转角度的文字
+        List<TextPosition> rotationTexts = textPositions.stream()
+                .filter(e -> e.getTextMatrix().getValue(0, 1) != 0.).collect(Collectors.toList());
+        // 水印文字基本都是有角度的,统计有旋转角度的文字高度
+        List<Float> heights = rotationTexts.stream().map(TextPosition::getHeight).collect(Collectors.toList());
+        // 如果全是水印文字则直接去除
+        if (textPositions.size() == heights.size()) {
+            return;
+        }
+
+        // 其他场景需要写TextElement属性
+        for (TextPosition textPosition : textPositions) {
+            if (textPosition == null) {
+                continue;
+            }
+
+            String c = textPosition.getUnicode();
+
+            // if c not printable, return
+            if (!isPrintable(c)) {
+                continue;
+            }
+
+            float h = textPosition.getHeightDir();
+
+            if (c.equals(NBSP)) { // replace non-breaking space for space
+                c = " ";
+            }
+
+            // 文字没有旋转角度,并且水印字体大小没有包含当前文字时说明是正常文字
+            float rotation = textPosition.getTextMatrix().getValue(0, 1);
+            if (rotation != 0. || heights.contains(h)) {
+                c = " ";
+            }
+
+            float wos = textPosition.getWidthOfSpace();
+
+            TextElement te = new TextElement(Utils.round(textPosition.getYDirAdj() - h, 2),
+                    Utils.round(textPosition.getXDirAdj(), 2), Utils.round(textPosition.getWidthDirAdj(), 2),
+                    Utils.round(textPosition.getHeightDir(), 2), textPosition.getFont(), textPosition.getFontSizeInPt(), c,
+                    // workaround a possible bug in PDFBox:
+                    // https://issues.apache.org/jira/browse/PDFBOX-1755
+                    wos, textPosition.getDir());
+
+            this.minCharWidth = (float) Math.min(this.minCharWidth, te.getWidth());
+            this.minCharHeight = (float) Math.min(this.minCharHeight, te.getHeight());
+
+            countHeight++;
+            totalHeight += te.getHeight();
+            float avgHeight = totalHeight / countHeight;
+
+            //We have an issue where tall blank cells throw off the row height calculation
+            //Introspect a blank cell a bit here to see if it should be thrown away
+            if ((te.getText() == null || te.getText().trim().equals(""))) {
+                //if the cell height is more than AVG_HEIGHT_MULT_THRESHOLDxaverage, throw it away
+                if (avgHeight > 0
+                        && te.getHeight() >= (avgHeight * AVG_HEIGHT_MULT_THRESHOLD)) {
+                    continue;
+                }
+
+                //if the font size is outside of reasonable ranges, throw it away
+                if (textPosition.getFontSizeInPt() > MAX_BLANK_FONT_SIZE || textPosition.getFontSizeInPt() < MIN_BLANK_FONT_SIZE) {
+                    continue;
+                }
+            }
+
+            this.spatialIndex.add(te);
+            this.textElements.add(te);
+        }
+    }
+
+    @Override
+    protected float computeFontHeight(PDFont font) throws IOException {
+        BoundingBox bbox = font.getBoundingBox();
+        if (bbox.getLowerLeftY() < Short.MIN_VALUE) {
+            // PDFBOX-2158 and PDFBOX-3130
+            // files by Salmat eSolutions / ClibPDF Library
+            bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536));
+        }
+        // 1/2 the bbox is used as the height todo: why?
+        float glyphHeight = bbox.getHeight() / 2;
+
+        // sometimes the bbox has very high values, but CapHeight is OK
+        PDFontDescriptor fontDescriptor = font.getFontDescriptor();
+        if (fontDescriptor != null) {
+            float capHeight = fontDescriptor.getCapHeight();
+            if (Float.compare(capHeight, 0) != 0 &&
+                    (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
+                glyphHeight = capHeight;
+            }
+            // PDFBOX-3464, PDFBOX-448:
+            // sometimes even CapHeight has very high value, but Ascent and Descent are ok
+            float ascent = fontDescriptor.getAscent();
+            float descent = fontDescriptor.getDescent();
+            if (ascent > 0 && descent < 0 &&
+                    ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) {
+                glyphHeight = (ascent - descent) / 2;
+            }
+        }
+
+        // transformPoint from glyph space -> text space
+        float height;
+        if (font instanceof PDType3Font) {
+            height = font.getFontMatrix().transformPoint(0, glyphHeight).y;
+        } else {
+            height = glyphHeight / 1000;
+        }
+
+        return height;
+    }
+
+    private boolean isPrintable(String s) {
+        char c;
+        Character.UnicodeBlock block;
+        boolean printable = false;
+        for (int i = 0; i < s.length(); i++) {
+            c = s.charAt(i);
+            block = Character.UnicodeBlock.of(c);
+            printable |= !Character.isISOControl(c) && block != null && block != Character.UnicodeBlock.SPECIALS;
+        }
+        return printable;
+    }
+
+    public List<TextElement> getTextElements() {
+        return this.textElements;
+    }
+
+    public RectangleSpatialIndex<TextElement> getSpatialIndex() {
+        return spatialIndex;
+    }
+
+    public float getMinCharWidth() {
+        return minCharWidth;
+    }
+
+    public float getMinCharHeight() {
+        return minCharHeight;
+    }
+}

+ 0 - 285
service-daq/src/main/java/com/simuwang/daq/components/PDMonthlyReportParser.java

@@ -1,285 +0,0 @@
-package com.simuwang.daq.components;
-
-import cn.hutool.core.collection.CollUtil;
-import cn.hutool.core.collection.ListUtil;
-import cn.hutool.core.map.MapUtil;
-import cn.hutool.core.util.ReflectUtil;
-import cn.hutool.core.util.StrUtil;
-import com.simuwang.base.common.exception.APIException;
-import com.simuwang.base.mapper.EmailFieldMappingMapper;
-import com.simuwang.base.pojo.dos.EmailFieldMappingDO;
-import com.simuwang.daq.dto.MonthlyReportNavInfo;
-import com.simuwang.daq.dto.ReportFundInfo;
-import com.simuwang.daq.dto.ReportInfo;
-import com.smppw.common.pojo.ValueLabelVO;
-import org.apache.pdfbox.Loader;
-import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.springframework.stereotype.Component;
-import technology.tabula.*;
-import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
-
-import java.io.IOException;
-import java.util.Calendar;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
-
-/**
- * @author wangzaijun
- * @date 2024/9/11 16:19
- * @description pdf格式的月报解析
- */
-@Component("monthly-report:pdf")
-public class PDMonthlyReportParser extends AbstractReportParser<MonthlyReportNavInfo> {
-    private final List<Table> extNavTables = ListUtil.list(true);
-    private final EmailFieldMappingMapper fieldMappingMapper;
-    private String reportName = null;
-    private Table baseInfoTable = null;
-    private List<ValueLabelVO> fieldMapper = null;
-
-    public PDMonthlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
-        this.fieldMappingMapper = fieldMappingMapper;
-    }
-
-    @Override
-    protected void initParse() throws IOException {
-        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(this.filepath))) {
-            CustomPDFTextStripper stripper = new CustomPDFTextStripper();
-            stripper.setSortByPosition(true);
-            String text = stripper.getText(document);
-            text = text.replace("++\r\n", "").replace("++", "");
-            List<String> textList = StrUtil.split(text, "\r\n");
-            if (CollUtil.isNotEmpty(textList)) {
-                List<String> wkList = this.watermarkListMap.get("report_name");
-                String name = this.processString(wkList, textList.get(0));
-                this.reportName = this.matchReportName(name);
-                if (StrUtil.isBlank(this.reportName)) {
-                    throw new APIException("未匹配到报告名称");
-                }
-            }
-
-            SpreadsheetExtractionAlgorithm extractionAlgorithm = new SpreadsheetExtractionAlgorithm();
-            PageIterator pageIterator = new ObjectExtractor(document).extract();
-            while (pageIterator.hasNext()) {
-                Page page = pageIterator.next();
-                List<Table> tables = extractionAlgorithm.extract(page);
-                tables = tables.stream().distinct().collect(Collectors.toList());
-                for (Table table : tables) {
-                    int colCount = table.getColCount();
-                    if (colCount == 4) {
-                        this.baseInfoTable = table;
-                    } else if (colCount >= 5) {
-                        this.extNavTables.add(table);
-                    }
-                }
-            }
-        }
-        List<EmailFieldMappingDO> emailFieldMapping = this.fieldMappingMapper.getEmailFieldMapping(1);
-        if (CollUtil.isNotEmpty(emailFieldMapping)) {
-            this.fieldMapper = emailFieldMapping.stream().map(e -> new ValueLabelVO(e.getCode(), e.getName())).collect(Collectors.toList());
-        }
-    }
-
-    @Override
-    protected ReportInfo parseReportInfo(Integer fileId) {
-        ReportInfo reportInfo = new ReportInfo();
-        reportInfo.setFileId(fileId);
-        reportInfo.setReportName(this.reportName);
-        reportInfo.setReportType(this.matchReportType(this.reportName));
-        reportInfo.setReportDate(this.matchReportDate(this.reportName));
-        return reportInfo;
-    }
-
-    @Override
-    protected ReportFundInfo parseBaseInfo() {
-        Table baseInfoTable = this.baseInfoTable;
-        if (baseInfoTable == null) {
-            throw new APIException("未解析到基本信息表格");
-        }
-        Map<String, Object> baseInfoMap = MapUtil.newHashMap(32);
-        for (int i = 0; i < baseInfoTable.getRows().size(); i++) {
-            List<RectangularTextContainer> cols = baseInfoTable.getRows().get(i);
-            for (int j = 0; j < 2; j++) {
-                baseInfoMap.put(cols.get(j * 2).getText(), cols.get(j * 2 + 1).getText());
-            }
-        }
-        // 匹配字段清洗字段
-        ReportFundInfo reportFundInfo = new ReportFundInfo();
-        this.buildInfo(baseInfoMap, reportFundInfo);
-        return reportFundInfo;
-    }
-
-    @Override
-    protected List<MonthlyReportNavInfo> parseExtInfo() {
-        List<MonthlyReportNavInfo> exts = ListUtil.list(false);
-        List<Table> extNavTables = this.extNavTables;
-        for (Table extNavTable : extNavTables) {
-            Map<String, Object> extInfoMap = MapUtil.newHashMap(16);
-            for (int i = 0; i < extNavTable.getColCount(); i++) {
-                String key = extNavTable.getCell(0, i).getText();
-                String value = extNavTable.getCell(1, i).getText();
-                extInfoMap.put(key, value);
-            }
-            MonthlyReportNavInfo navInfo = new MonthlyReportNavInfo();
-            buildInfo(extInfoMap, navInfo);
-            exts.add(navInfo);
-        }
-        return exts;
-    }
-
-    private void buildInfo(Map<String, Object> extInfoMap, Object info) {
-        for (Map.Entry<String, Object> entry : extInfoMap.entrySet()) {
-            String k = entry.getKey();
-            Object v = entry.getValue();
-            String fieldValue = StrUtil.toStringOrNull(v);
-            if (fieldValue.startsWith("-") || fieldValue.endsWith("-")) {
-                fieldValue = null;
-            }
-            if (fieldValue != null) {
-                fieldValue = fieldValue.replace("\r", "");
-            }
-            for (ValueLabelVO vo : this.fieldMapper) {
-                String fieldName = vo.getValue();
-                List<String> labels = StrUtil.split(vo.getLabel(), ",");
-                if (labels.contains(k)) {
-                    try {
-                        ReflectUtil.setFieldValue(info, fieldName, fieldValue);
-                    } catch (Exception e) {
-                        this.logger.warn("{} 字段值设置错误:{}", fieldName, e.getMessage());
-                    }
-                    break;
-                }
-                for (String label : labels) {
-                    if (k.contains(label)) {
-                        try {
-                            ReflectUtil.setFieldValue(info, fieldName, fieldValue);
-                        } catch (Exception e) {
-                            this.logger.warn("{} 字段值设置错误:{}", fieldName, e.getMessage());
-                        }
-                        break;
-                    }
-                }
-            }
-        }
-    }
-
-    @Override
-    protected void saveResult(ReportInfo reportInfo, ReportFundInfo reportFundInfo, List<MonthlyReportNavInfo> exts) {
-        System.out.println("保存数据!");
-    }
-
-    /**
-     * 匹配报告日期
-     *
-     * @param string 文本内容
-     * @return 报告日期
-     */
-    private String matchReportDate(String string) {
-        if (string == null) {
-            return null;
-        }
-
-        // 编译正则表达式模式
-        Pattern pat1 = Pattern.compile("(2\\d{3}).*([一二三四1234])季度");  // 2023年XXX3季度
-        Pattern pat2 = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");  // 2023-12-31
-        Pattern pat3 = Pattern.compile("(2\\d{3})年年度");  // 2023年年度
-        Pattern pat4 = Pattern.compile("(\\d{4})年(\\d{1,2})月");  // 2023年12月
-
-        // 创建Matcher对象
-        Matcher matcher1 = pat1.matcher(string);
-        Matcher matcher2 = pat2.matcher(string);
-        Matcher matcher3 = pat3.matcher(string);
-        Matcher matcher4 = pat4.matcher(string);
-
-        // 尝试匹配
-        if (matcher1.find()) {
-            String year = matcher1.group(1);
-            String quarter = matcher1.group(2);
-            return switch (quarter) {
-                case "一", "1" -> year + "-03-31";
-                case "二", "2" -> year + "-06-30";
-                case "三", "3" -> year + "-09-30";
-                case "四", "4" -> year + "-12-31";
-                default -> null;
-            };
-        } else if (matcher2.find()) {
-            return matcher2.group();
-        } else if (matcher3.find()) {
-            return matcher3.group(1) + "-12-31";
-        } else if (matcher4.find()) {
-            String year = matcher4.group(1);
-            String month = matcher4.group(2);
-            int lastDayOfMonth = getLastDayOfMonth(Integer.parseInt(year), Integer.parseInt(month));
-            return year + "-" + padZero(month) + "-" + padZero(lastDayOfMonth + "");
-        } else {
-            return null;
-        }
-    }
-
-    /**
-     * 匹配报告类型,如“季度”、“年度”
-     *
-     * @param string 输入字符串
-     * @return 匹配到的报告类型子字符串,如果没有匹配到则返回null
-     */
-    private String matchReportType(String string) {
-        if (string == null) {
-            return null;
-        }
-
-        // 编译正则表达式模式
-        Pattern pattern = Pattern.compile("月|季度|年度");
-
-        // 创建Matcher对象
-        Matcher matcher = pattern.matcher(string);
-
-        // 尝试匹配
-        if (matcher.find()) {
-            return matcher.group();
-        } else {
-            return null;
-        }
-    }
-
-    private String matchReportName(String text) {
-        if (StrUtil.isBlank(text)) {
-            return null;
-        }
-        // 编译正则表达式模式
-        Pattern pat1 = Pattern.compile(".+?报([告表])?\\d{4}(\\.?\\d{1,2}(\\.?\\d{2})?)?");
-        Pattern pat2 = Pattern.compile("私募.*披露年度报[告表]((\\d{4}-\\d{2}-\\d{2}至\\d{4}-\\d{2}-\\d{2}))?");
-        Pattern pat3 = Pattern.compile(".+?报([告表])?\\d{4}-\\d{2}-\\d{2}至\\d{4}-\\d{2}-\\d{2}?");
-
-        // 创建Matcher对象
-        Matcher matcher1 = pat1.matcher(text);
-        Matcher matcher2 = pat2.matcher(text);
-        Matcher matcher3 = pat3.matcher(text);
-
-        // 尝试匹配
-        String reportName;
-        if (matcher1.find()) {
-            reportName = matcher1.group();
-        } else if (matcher2.find()) {
-            reportName = matcher2.group();
-        } else if (matcher3.find()) {
-            reportName = matcher3.group();
-        } else {
-            reportName = text;
-        }
-        return reportName.replace("(", "(").replace(")", ")");
-    }
-
-    private int getLastDayOfMonth(int year, int month) {
-        Calendar calendar = Calendar.getInstance();
-        calendar.set(Calendar.YEAR, year);
-        calendar.set(Calendar.MONTH, month - 1); // Calendar.MONTH 是从0开始的
-        return calendar.getActualMaximum(Calendar.DAY_OF_MONTH);
-    }
-
-    private String padZero(String number) {
-        return String.format("%02d", Integer.parseInt(number));
-    }
-}

+ 10 - 9
service-daq/src/main/java/com/simuwang/daq/components/PythonReportConverter.java

@@ -7,6 +7,7 @@ import cn.hutool.core.util.StrUtil;
 import cn.hutool.json.JSONArray;
 import cn.hutool.json.JSONArray;
 import cn.hutool.json.JSONObject;
 import cn.hutool.json.JSONObject;
 import cn.hutool.json.JSONUtil;
 import cn.hutool.json.JSONUtil;
+import com.simuwang.base.common.enums.ReportType;
 import com.simuwang.base.pojo.dos.report.BaseReportDO;
 import com.simuwang.base.pojo.dos.report.BaseReportDO;
 import com.simuwang.base.pojo.dto.report.*;
 import com.simuwang.base.pojo.dto.report.*;
 
 
@@ -21,8 +22,8 @@ import java.util.Set;
  */
  */
 public class PythonReportConverter {
 public class PythonReportConverter {
     @SuppressWarnings("unchecked")
     @SuppressWarnings("unchecked")
-    public static <T extends ReportData> PythonResult<T> convert(JSONObject jsonObject, Integer type) {
-        PythonResult<T> result = new PythonResult<>();
+    public static <T extends ReportData> ParseResult<T> convert(JSONObject jsonObject, ReportType type) {
+        ParseResult<T> result = new ParseResult<>();
         if (jsonObject == null) {
         if (jsonObject == null) {
             return result;
             return result;
         }
         }
@@ -34,7 +35,7 @@ public class PythonReportConverter {
         }
         }
 
 
         T reportData;
         T reportData;
-        if (Objects.equals(2, type) || Objects.equals(1, type)) {
+        if (Objects.equals(ReportType.ANNUALLY, type) || Objects.equals(ReportType.QUARTERLY, type)) {
             reportData = (T) convertQuarterly(data);
             reportData = (T) convertQuarterly(data);
         } else {
         } else {
             reportData = (T) convertMonthly(data);
             reportData = (T) convertMonthly(data);
@@ -44,17 +45,17 @@ public class PythonReportConverter {
     }
     }
 
 
     private static MonthlyReportData convertMonthly(JSONObject jsonObject) {
     private static MonthlyReportData convertMonthly(JSONObject jsonObject) {
-        MonthlyReportData reportData = new MonthlyReportData();
-        reportData.setBaseInfo(convertToObj(jsonObject, "base_info", ReportBaseInfoDTO.class));
-        reportData.setFundInfo(convertToObj(jsonObject, "fund_info", ReportFundInfoDTO.class));
+        ReportBaseInfoDTO baseInfo = convertToObj(jsonObject, "base_info", ReportBaseInfoDTO.class);
+        ReportFundInfoDTO fundInfo = convertToObj(jsonObject, "fund_info", ReportFundInfoDTO.class);
+        MonthlyReportData reportData = new MonthlyReportData(baseInfo, fundInfo);
         reportData.setNetReport(convertToList(jsonObject, "net_report", ReportNetReportDTO.class));
         reportData.setNetReport(convertToList(jsonObject, "net_report", ReportNetReportDTO.class));
         return reportData;
         return reportData;
     }
     }
 
 
     private static QuarterlyReportData convertQuarterly(JSONObject jsonObject) {
     private static QuarterlyReportData convertQuarterly(JSONObject jsonObject) {
-        QuarterlyReportData reportData = new QuarterlyReportData();
-        reportData.setBaseInfo(convertToObj(jsonObject, "base_info", ReportBaseInfoDTO.class));
-        reportData.setFundInfo(convertToObj(jsonObject, "fund_info", ReportFundInfoDTO.class));
+        ReportBaseInfoDTO baseInfo = convertToObj(jsonObject, "base_info", ReportBaseInfoDTO.class);
+        ReportFundInfoDTO fundInfo = convertToObj(jsonObject, "fund_info", ReportFundInfoDTO.class);
+        QuarterlyReportData reportData = new QuarterlyReportData(baseInfo, fundInfo);
         reportData.setAssetAllocation(convertToList(jsonObject, "asset_allocation", ReportAssetAllocationDTO.class));
         reportData.setAssetAllocation(convertToList(jsonObject, "asset_allocation", ReportAssetAllocationDTO.class));
         reportData.setFinancialIndicators(convertToList(jsonObject, "financial_indicators", ReportFinancialIndicatorsDTO.class));
         reportData.setFinancialIndicators(convertToList(jsonObject, "financial_indicators", ReportFinancialIndicatorsDTO.class));
         reportData.setInvestmentIndustry(convertToList(jsonObject, "investment_industry", ReportInvestmentIndustryDTO.class));
         reportData.setInvestmentIndustry(convertToList(jsonObject, "investment_industry", ReportInvestmentIndustryDTO.class));

+ 0 - 18
service-daq/src/main/java/com/simuwang/daq/components/ReportParser.java

@@ -1,18 +0,0 @@
-package com.simuwang.daq.components;
-
-/**
- * @author wangzaijun
- * @date 2024/9/9 19:18
- * @description 报告模板解析器,计划支持pdf、word等
- */
-public interface ReportParser {
-    /**
-     * 报告模板解析接口
-     * 扩展支持月报、季报和年报,解析文件格式支持pdf、word和excel
-     *
-     * @param fileId        文件id
-     * @param filepath      文件路径
-     * @param watermarkName 生成水印
-     */
-    void parse(Integer fileId, String filepath, String watermarkName);
-}

+ 117 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/AbstractReportParser.java

@@ -0,0 +1,117 @@
+package com.simuwang.daq.components.report.parser;
+
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.ReflectUtil;
+import cn.hutool.core.util.StrUtil;
+import com.simuwang.base.mapper.EmailFieldMappingMapper;
+import com.simuwang.base.pojo.dos.EmailFieldMappingDO;
+import com.simuwang.base.pojo.dto.report.ReportData;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.regex.Pattern;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/30 18:13
+ * @description 非python接口的报告解析抽象(主要是支持pdf、word和excel等格式)
+ */
+public abstract class AbstractReportParser<T extends ReportData> implements ReportParser<T> {
+    protected final Logger logger = LoggerFactory.getLogger(this.getClass());
+
+    private final EmailFieldMappingMapper fieldMappingMapper;
+    /**
+     * 字段匹配规则
+     */
+    protected Map<String, String> fieldMapper;
+
+    public AbstractReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        this.fieldMappingMapper = fieldMappingMapper;
+        this.fieldMapper = MapUtil.newHashMap(128);
+    }
+
+    /**
+     * 初始化数据的方法
+     */
+    protected void init() {
+        List<EmailFieldMappingDO> emailFieldMapping = this.fieldMappingMapper.getEmailFieldMapping(3);
+        if (CollUtil.isEmpty(emailFieldMapping)) {
+            this.logger.error("未设置报告解析规则!");
+            return;
+        }
+        for (EmailFieldMappingDO mapping : emailFieldMapping) {
+            String code = mapping.getCode();
+            List<String> names = StrUtil.split(mapping.getName(), ",");
+            for (String name : names) {
+                this.fieldMapper.putIfAbsent(name, code);
+            }
+        }
+    }
+
+    /**
+     * 数据清洗,默认啥也不做
+     *
+     * @param reportData 结果数据
+     */
+    protected abstract void cleaningReportData(T reportData);
+
+    /**
+     * 对象字段设置
+     *
+     * @param extInfoMap 名称与值的对应关系
+     * @param info       待设置的对象
+     */
+    protected void buildInfo(Map<String, Object> extInfoMap, Object info) {
+        if (MapUtil.isEmpty(extInfoMap)) {
+            return;
+        }
+        for (Map.Entry<String, Object> entry : extInfoMap.entrySet()) {
+            String k = this.cleaningValue(entry.getKey());
+            String fieldValue = this.cleaningValue(entry.getValue());
+            String fieldName = this.fieldMapper.get(k);
+            if (StrUtil.isBlank(fieldName)) {
+                continue;
+            }
+            try {
+                ReflectUtil.setFieldValue(info, fieldName, fieldValue);
+            } catch (Exception e) {
+                this.logger.warn("{} 字段值设置错误:{}", fieldName, e.getMessage());
+            }
+        }
+    }
+
+    protected String cleaningValue(Object value) {
+        return this.cleaningValue(value, true);
+    }
+
+    /**
+     * 数据简单清洗,并全部转为字符串类型
+     *
+     * @param value              待清洗的数据
+     * @param replaceParentheses 是否替换圆括号
+     * @return /
+     */
+    protected String cleaningValue(Object value, boolean replaceParentheses) {
+        String fieldValue = StrUtil.toStringOrNull(value);
+        if (!StrUtil.isNullOrUndefined(fieldValue)) {
+            // 特殊字符替换,空格替换为空字符
+            fieldValue = fieldValue
+                    .replace("\r", StrUtil.EMPTY)
+                    .replace(";", ";")
+                    .replaceAll(" ", StrUtil.EMPTY);
+            if (replaceParentheses) {
+                // 正则表达式匹配中文括号及其内容,并替换为空字符串
+                fieldValue = Pattern.compile("[(|(][^)]*[)|)]").matcher(fieldValue).replaceAll(StrUtil.EMPTY);
+            }
+        }
+        // 如果仅有 “-” 该字段值为null
+        if (Objects.equals("-", fieldValue)) {
+            fieldValue = null;
+        }
+        return StrUtil.isBlank(fieldValue) ? null : fieldValue;
+    }
+}

+ 33 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParser.java

@@ -0,0 +1,33 @@
+package com.simuwang.daq.components.report.parser;
+
+import com.simuwang.base.common.exception.ReportParseException;
+import com.simuwang.base.pojo.dto.report.ReportData;
+import com.simuwang.base.pojo.dto.report.ReportParserParams;
+
+import java.io.IOException;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/9 19:18
+ * @description 报告模板解析器,计划支持pdf、word等
+ */
+public interface ReportParser<T extends ReportData> {
+    /**
+     * 获取当前解析器名称
+     *
+     * @return /
+     */
+    default String getParser() {
+        return this.getClass().getSimpleName();
+    }
+
+    /**
+     * 报告模板解析接口
+     * 扩展支持月报、季报和年报,解析文件格式支持pdf、word和excel
+     *
+     * @param params 解析请求参数
+     * @return 解析结果
+     * @throws IOException 文件io异常
+     */
+    T parse(ReportParserParams params) throws IOException, ReportParseException;
+}

+ 69 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParserConstant.java

@@ -0,0 +1,69 @@
+package com.simuwang.daq.components.report.parser;
+
+import cn.hutool.core.map.MapUtil;
+import com.simuwang.base.common.enums.ReportParserFileType;
+import com.simuwang.base.common.enums.ReportType;
+
+import java.util.Map;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 13:39
+ * @description 报告解析的bean名称关系配置
+ */
+public final class ReportParserConstant {
+    public static final Map<ReportType, Map<ReportParserFileType, String>> REPORT_PARSER_BEAN_MAP = MapUtil.newHashMap(8);
+
+    public static final String PARSER_PDF_MONTHLY = "report-parser:pdf:monthly";
+    public static final String PARSER_DOC_MONTHLY = "report-parser:doc:monthly";
+    public static final String PARSER_DOCX_MONTHLY = "report-parser:docx:monthly";
+    public static final String PARSER_XLSX_MONTHLY = "report-parser:xlsx:monthly";
+    public static final String PARSER_XLS_MONTHLY = "report-parser:xls:monthly";
+    public static final String PARSER_PYTHON_MONTHLY = "report-parser:python:monthly";
+
+    public static final String PARSER_PDF_QUARTERLY = "report-parser:pdf:quarterly";
+    public static final String PARSER_DOC_QUARTERLY = "report-parser:doc:quarterly";
+    public static final String PARSER_DOCX_QUARTERLY = "report-parser:docx:quarterly";
+    public static final String PARSER_XLSX_QUARTERLY = "report-parser:xlsx:quarterly";
+    public static final String PARSER_XLS_QUARTERLY = "report-parser:xls:quarterly";
+    public static final String PARSER_PYTHON_QUARTERLY = "report-parser:python:quarterly";
+
+    public static final String PARSER_PDF_ANNUALLY = "report-parser:pdf:annually";
+    public static final String PARSER_DOC_ANNUALLY = "report-parser:doc:annually";
+    public static final String PARSER_DOCX_ANNUALLY = "report-parser:docx:annually";
+    public static final String PARSER_XLSX_ANNUALLY = "report-parser:xlsx:annually";
+    public static final String PARSER_XLS_ANNUALLY = "report-parser:xls:annually";
+    public static final String PARSER_PYTHON_ANNUALLY = "report-parser:python:annually";
+
+    static {
+        REPORT_PARSER_BEAN_MAP.put(ReportType.MONTHLY,
+                Map.of(ReportParserFileType.PDF, PARSER_PDF_MONTHLY,
+                        ReportParserFileType.DOC, PARSER_DOC_MONTHLY,
+                        ReportParserFileType.DOCX, PARSER_DOCX_MONTHLY,
+                        ReportParserFileType.XLSX, PARSER_XLSX_MONTHLY,
+                        ReportParserFileType.XLS, PARSER_XLS_MONTHLY,
+
+                        ReportParserFileType.PYTHON, PARSER_PYTHON_MONTHLY
+                ));
+
+        REPORT_PARSER_BEAN_MAP.put(ReportType.QUARTERLY,
+                Map.of(ReportParserFileType.PDF, PARSER_PDF_QUARTERLY,
+                        ReportParserFileType.DOC, PARSER_DOC_QUARTERLY,
+                        ReportParserFileType.DOCX, PARSER_DOCX_QUARTERLY,
+                        ReportParserFileType.XLSX, PARSER_XLSX_QUARTERLY,
+                        ReportParserFileType.XLS, PARSER_XLS_QUARTERLY,
+
+                        ReportParserFileType.PYTHON, PARSER_PYTHON_QUARTERLY
+                ));
+
+        REPORT_PARSER_BEAN_MAP.put(ReportType.ANNUALLY,
+                Map.of(ReportParserFileType.PDF, PARSER_PDF_ANNUALLY,
+                        ReportParserFileType.DOC, PARSER_DOC_ANNUALLY,
+                        ReportParserFileType.DOCX, PARSER_DOCX_ANNUALLY,
+                        ReportParserFileType.XLSX, PARSER_XLSX_ANNUALLY,
+                        ReportParserFileType.XLS, PARSER_XLS_ANNUALLY,
+
+                        ReportParserFileType.PYTHON, PARSER_PYTHON_ANNUALLY
+                ));
+    }
+}

+ 32 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/ReportParserFactory.java

@@ -0,0 +1,32 @@
+package com.simuwang.daq.components.report.parser;
+
+import cn.hutool.core.map.MapUtil;
+import com.simuwang.base.common.enums.ReportParserFileType;
+import com.simuwang.base.common.enums.ReportType;
+import com.simuwang.base.common.exception.ReportParseException;
+import com.simuwang.base.pojo.dto.report.ReportData;
+import com.simuwang.base.pojo.dto.report.ReportParseStatus;
+import org.springframework.stereotype.Component;
+
+import java.util.Map;
+
+@Component
+public class ReportParserFactory {
+    private static final ReportParser<? extends ReportData> DEFAULT = (ReportParser<ReportData>) params -> null;
+
+    private static final Map<String, ReportParser<? extends ReportData>> REPORT_WRITER_MAP = MapUtil.newHashMap(32);
+
+    public ReportParserFactory(Map<String, ReportParser<? extends ReportData>> components) {
+        REPORT_WRITER_MAP.putAll(components);
+    }
+
+    @SuppressWarnings("unchecked")
+    public <T extends ReportData> ReportParser<T> getInstance(ReportType reportType, ReportParserFileType reportParserFileType) {
+        String beanName = ReportParserConstant.REPORT_PARSER_BEAN_MAP.getOrDefault(reportType, MapUtil.empty()).get(reportParserFileType);
+        ReportParser<? extends ReportData> reportParser = REPORT_WRITER_MAP.get(beanName);
+        if (reportParser == null) {
+            throw new ReportParseException(ReportParseStatus.NO_SUPPORT_TEMPLATE);
+        }
+        return (ReportParser<T>) reportParser;
+    }
+}

+ 330 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/AbstractPDReportParser.java

@@ -0,0 +1,330 @@
+package com.simuwang.daq.components.report.parser.pdf;
+
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.exceptions.ExceptionUtil;
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.StrUtil;
+import com.simuwang.base.common.conts.Constants;
+import com.simuwang.base.common.enums.ReportType;
+import com.simuwang.base.common.exception.ReportParseException;
+import com.simuwang.base.mapper.EmailFieldMappingMapper;
+import com.simuwang.base.pojo.dto.report.*;
+import com.simuwang.daq.components.CustomPDFTextStripper;
+import com.simuwang.daq.components.report.parser.AbstractReportParser;
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import technology.tabula.CustomObjectExtractor;
+import technology.tabula.Page;
+import technology.tabula.PageIterator;
+import technology.tabula.Table;
+import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
+
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.Function;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 16:45
+ * @description pdf格式的报告解析抽象类
+ */
+public abstract class AbstractPDReportParser<T extends ReportData> extends AbstractReportParser<T> {
+    /**
+     * 基金信息表格
+     */
+    protected Table fundInfoTable;
+    /**
+     * 去除了水印的所有文本内容
+     */
+    protected List<String> textList;
+
+    public AbstractPDReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    public T parse(ReportParserParams params) throws IOException, ReportParseException {
+        // 先初始化为null
+        this.fundInfoTable = null;
+        this.textList = null;
+        // 初始化
+        this.init();
+        // 解析报告和表格
+        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(params.getFilepath()))) {
+            // 识别所有文字(去水印后的)
+            CustomPDFTextStripper stripper = new CustomPDFTextStripper();
+            stripper.setSortByPosition(true);
+            String text = stripper.getText(document).replace(Constants.WATERMARK_REPLACE, StrUtil.EMPTY);
+            this.textList = StrUtil.split(text, System.lineSeparator());
+            this.textList.removeIf(StrUtil::isBlank);
+            if (this.textList.isEmpty()) {
+                throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN);
+            }
+            // 报告名称和类型一般在第一第二行
+            if (this.matchReportType(this.textList.get(0)) == null && this.matchReportType(this.textList.get(1)) == null) {
+                throw new ReportParseException(ReportParseStatus.NOT_A_REPORT);
+            }
+            // 解析所有表格(单元格字符去水印)
+            List<Table> tables = ListUtil.list(true);
+            SpreadsheetExtractionAlgorithm extractionAlgorithm = new SpreadsheetExtractionAlgorithm();
+            // 自定义表格提取工具,去除单元格中的水印文字
+            PageIterator pageIterator = new CustomObjectExtractor(document).extract();
+            while (pageIterator.hasNext()) {
+                Page page = pageIterator.next();
+                tables.addAll(extractionAlgorithm.extract(page));
+            }
+            if (tables.isEmpty()) {
+                throw new ReportParseException(ReportParseStatus.REPORT_IS_SCAN);
+            }
+            this.initTableInfo(tables);
+        }
+        try {
+            // 报告基本信息
+            ReportBaseInfoDTO reportInfo = this.buildReportInfo(params);
+            // 解析报告中主体基金的基本信息
+            ReportFundInfoDTO reportFundInfo = this.buildFundInfo(params);
+            // 解析其他表格信息并且设置结果字段
+            T reportData = this.parseExtInfoAndSetData(reportInfo, reportFundInfo);
+            // 数据清洗后返回
+            this.cleaningReportData(reportData);
+            return reportData;
+        } catch (ReportParseException e) {
+            throw e;
+        } catch (Exception e) {
+            this.logger.warn("报告解析错误:{}", ExceptionUtil.stacktraceToString(e));
+            throw new ReportParseException(ReportParseStatus.NOT_A_FIXED_FORMAT);
+        }
+    }
+
+    /**
+     * 初始化解析所有表格数据
+     *
+     * @param tables 按固定的表格模式划分到不同的对象中
+     */
+    protected abstract void initTableInfo(List<Table> tables);
+
+    /**
+     * 绑定基金基本信息(年报的基金基本信息解析逻辑要覆盖重写)
+     *
+     * @param params /
+     * @return /
+     */
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        Table fundInfoTable = this.fundInfoTable;
+        if (fundInfoTable == null) {
+            throw new ReportParseException(ReportParseStatus.PARSE_FUND_INFO_FAIL);
+        }
+        // 基金基本信息映射
+        return this.buildDto(params.getFileId(), fundInfoTable, ReportFundInfoDTO.class, this::parseFundInfo);
+    }
+
+    /**
+     * 解析基金基本信息表格
+     *
+     * @param fundInfoTable 表格
+     * @return /
+     */
+    protected abstract Map<String, Object> parseFundInfo(Table fundInfoTable);
+
+    /**
+     * 解析报告的其他信息并设置到对象中
+     *
+     * @param reportInfo 报告基本信息
+     * @param fundInfo   报告中基金基本信息
+     * @return /
+     */
+    protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo);
+
+    @Override
+    protected void cleaningReportData(T reportData) {
+        // cleaning.
+    }
+
+    /**
+     * 构建报告基本信息
+     *
+     * @param params /
+     * @return /
+     */
+    private ReportBaseInfoDTO buildReportInfo(ReportParserParams params) {
+        Integer fileId = params.getFileId();
+        String reportName = params.getFilename();
+        ReportBaseInfoDTO reportInfo = new ReportBaseInfoDTO(fileId);
+        reportInfo.setReportName(reportName);
+        reportInfo.setReportType(this.matchReportType(reportName));
+        reportInfo.setReportDate(this.matchReportDate(reportName));
+        return reportInfo;
+    }
+
+    /**
+     * 构建只有两列表格的dto数据对象,如果有分级基金时
+     *
+     * @param <DTO>    泛型对象
+     * @param fileId   文件id
+     * @param tables   表格
+     * @param clazz    泛型对象
+     * @param function 表格转换的函数
+     * @return /
+     */
+    protected <DTO extends BaseReportLevelDTO<?>> List<DTO> buildLevelDto(Integer fileId, List<Table> tables, Class<DTO> clazz,
+                                                                          Function<Table, Map<String, Object>> function) {
+        // 映射转换
+        List<DTO> dtos = tables.stream().filter(Objects::nonNull)
+                .map(e -> this.buildDto(fileId, e, clazz, function)).collect(Collectors.toList());
+        // 分级基金匹配
+        List<String> levels = this.matchTieredFund(String.join(",", this.textList));
+        levels.add(0, "母基金");
+        for (int i = 0; i < dtos.size(); i++) {
+            if (levels.size() <= i) {
+                continue;
+            }
+            dtos.get(i).setLevel(levels.get(i));
+        }
+        return dtos;
+    }
+
+    /**
+     * 构建只有两列表格的dto数据对象
+     *
+     * @param <DTO>    泛型对象
+     * @param fileId   文件id
+     * @param table    表格
+     * @param clazz    泛型对象
+     * @param function 表格转换的函数
+     * @return /
+     */
+    private <DTO extends BaseReportDTO<?>> DTO buildDto(Integer fileId, Table table, Class<DTO> clazz,
+                                                        Function<Table, Map<String, Object>> function) {
+        try {
+            Map<String, Object> extInfoMap = function == null ? MapUtil.empty() : function.apply(table);
+            DTO dto = clazz.getDeclaredConstructor().newInstance();
+            dto.setFileId(fileId);
+            this.buildInfo(extInfoMap, dto);
+            return dto;
+        } catch (Exception ignored) {
+        }
+        return null;
+    }
+
+    /**
+     * 匹配分级基金名称
+     *
+     * @param text 文本内容
+     * @return /
+     */
+    protected List<String> matchTieredFund(String text) {
+        List<String> matches = ListUtil.list(false);
+        if (StrUtil.isBlank(text)) {
+            return matches;
+        }
+        // 使用正则表达式查找匹配项
+        Pattern pattern = Pattern.compile("[A-F]级|基金[A-F]");
+        Matcher matcher = pattern.matcher(text);
+        // 收集所有匹配项
+        while (matcher.find()) {
+            matches.add(matcher.group());
+        }
+        // 提取字母并按字母顺序排序
+        return matches.stream()
+                .map(s -> s.replaceAll("[^A-F]", ""))
+                .distinct()
+                .sorted()
+                .map(letter -> letter + "级")
+                .collect(Collectors.toList());
+    }
+
+    /**
+     * 匹配报告日期
+     *
+     * @param string 文本内容
+     * @return 报告日期
+     */
+    private String matchReportDate(String string) {
+        if (string == null) {
+            return null;
+        }
+        // 编译正则表达式模式
+        Pattern pat1 = Pattern.compile("(2\\d{3}).*([一二三四1234])季度");  // 2023年XXX3季度
+        Pattern pat2 = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");  // 2023-12-31
+        Pattern pat3 = Pattern.compile("(2\\d{3})年年度");  // 2023年年度
+        Pattern pat4 = Pattern.compile("(\\d{4})年(\\d{1,2})月");  // 2023年12月
+        Pattern pat5 = Pattern.compile("\\d{4}\\d{2}\\d{2}");  // 20231231
+        Pattern pat6 = Pattern.compile("(2\\d{3})年度");  // 2023年度
+        // 创建Matcher对象
+        Matcher matcher1 = pat1.matcher(string);
+        Matcher matcher2 = pat2.matcher(string);
+        Matcher matcher3 = pat3.matcher(string);
+        Matcher matcher4 = pat4.matcher(string);
+        Matcher matcher5 = pat5.matcher(string);
+        Matcher matcher6 = pat6.matcher(string);
+        // 尝试匹配
+        if (matcher1.find()) {
+            String year = matcher1.group(1);
+            String quarter = matcher1.group(2);
+            return switch (quarter) {
+                case "一", "1" -> year + "-03-31";
+                case "二", "2" -> year + "-06-30";
+                case "三", "3" -> year + "-09-30";
+                case "四", "4" -> year + "-12-31";
+                default -> null;
+            };
+        } else if (matcher2.find()) {
+            return matcher2.group();
+        } else if (matcher5.find()) {
+            return matcher5.group();
+        } else if (matcher3.find()) {
+            return matcher3.group(1) + "-12-31";
+        } else if (matcher6.find()) {
+            return matcher6.group(1) + "-12-31";
+        } else if (matcher4.find()) {
+            String year = matcher4.group(1);
+            String month = matcher4.group(2);
+            int lastDayOfMonth = getLastDayOfMonth(Integer.parseInt(year), Integer.parseInt(month));
+            return year + "-" + padZero(month) + "-" + padZero(lastDayOfMonth + "");
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * 匹配报告类型,如“季度”、“年度”
+     *
+     * @param string 输入字符串
+     * @return 匹配到的报告类型子字符串,如果没有匹配到则返回null
+     */
+    private String matchReportType(String string) {
+        if (string == null) {
+            return null;
+        }
+        // 所有报告的正则识别方式
+        String patterns = ReportType.getAllPatterns();
+        // 编译正则表达式模式
+        Pattern pattern = Pattern.compile(patterns);
+        // 创建Matcher对象
+        Matcher matcher = pattern.matcher(string);
+        // 尝试匹配
+        if (matcher.find()) {
+            return matcher.group();
+        } else {
+            return null;
+        }
+    }
+
+    private int getLastDayOfMonth(int year, int month) {
+        Calendar calendar = Calendar.getInstance();
+        calendar.set(Calendar.YEAR, year);
+        calendar.set(Calendar.MONTH, month - 1); // Calendar.MONTH 是从0开始的
+        return calendar.getActualMaximum(Calendar.DAY_OF_MONTH);
+    }
+
+    private String padZero(String number) {
+        return String.format("%02d", Integer.parseInt(number));
+    }
+}

+ 156 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDAnnuallyReportParser.java

@@ -0,0 +1,156 @@
+package com.simuwang.daq.components.report.parser.pdf;
+
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
+import com.simuwang.base.mapper.EmailFieldMappingMapper;
+import com.simuwang.base.pojo.dto.report.*;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+import technology.tabula.Table;
+
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+
+/**
+ * @author wangzaijun
+ * @date 2024/10/10 17:34
+ * @description 年报解析逻辑:基本信息被拆分为多个表格,财务报表未解析
+ */
+@Component(ReportParserConstant.PARSER_PDF_ANNUALLY)
+public class PDAnnuallyReportParser extends PDQuarterlyReportParser<AnnuallyReportData> {
+    private static final List<String> FINANCIAL_INDICATORS_COLUMN_NAMES = ListUtil.list(false);
+
+    static {
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末基金净资产");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("报告期期末单位净值");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("本期利润");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("本期已实现收益");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末可供分配利润");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("期末可供分配基金份额利润");
+        FINANCIAL_INDICATORS_COLUMN_NAMES.add("基金份额累计净值增长率");
+    }
+
+    private List<Table> fundInfoTables;
+
+    public PDAnnuallyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    public String getParser() {
+        return ReportParserConstant.PARSER_PDF_ANNUALLY;
+    }
+
+    @Override
+    protected void initTableInfo(List<Table> tables) {
+        // 初始化
+        this.fundInfoTables = ListUtil.list(true);
+        this.financialIndicatorsTables = ListUtil.list(true);
+        this.shareChangeTables = ListUtil.list(true);
+        this.assetAllocationTables = ListUtil.list(true);
+        this.investmentIndustryTables = ListUtil.list(true);
+        for (int i = 0; i < tables.size(); i++) {
+            Table table = tables.get(i);
+            if (i <= 1) {
+                this.fundInfoTables.add(table);
+                continue;
+            }
+            // 用表格的第一列的数据判断是否主要财务指标数据
+            List<String> texts = this.getTableColTexts(table, 0);
+            if (CollUtil.containsAny(texts, FINANCIAL_INDICATORS_COLUMN_NAMES)) {
+                this.financialIndicatorsTables.add(table);
+                continue;
+            }
+            int colCount = table.getColCount();
+            if (colCount == 2) {
+                // 用表格的第一列的数据判断是否份额变动记录
+                if (CollUtil.containsAny(texts, SHARE_CHANGE_COLUMN_NAMES)) {
+                    this.shareChangeTables.add(table);
+                }
+            } else if (colCount == 4) {
+                // 用表格的第二列的数据判断是否行业配置数据(内地)
+                texts = this.getTableColTexts(table, 1);
+                if (CollUtil.containsAny(texts, INDUSTRY_COLUMN_NAMES)) {
+                    this.investmentIndustryTables.add(table);
+                }
+            } else if (colCount == 3) {
+                // 用表格的第一列的数据判断是否行业配置数据(港股通)
+                if (CollUtil.containsAny(texts, INDUSTRY_COLUMN_NAMES)) {
+                    this.investmentIndustryTables.add(table);
+                    continue;
+                }
+                // 资产配置表格识别(兼容跨页的表格)获取表格中第二列的所有文字,判断所有文字中包含"股权投资"等字符串
+                texts = this.getTableColTexts(table, 1);
+                if (CollUtil.containsAny(texts, ListUtil.of("股权投资", "股票投资", "债券投资", "另类投资", "其他资产", "其他融资总额"))) {
+                    this.assetAllocationTables.add(table);
+                }
+            }
+        }
+    }
+
+    @Override
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        Map<String, Object> fundInfoMap = MapUtil.newHashMap(32);
+        for (Table table : this.fundInfoTables) {
+            Map<String, Object> temp = this.parseFundInfo(table);
+            fundInfoMap.putAll(temp);
+        }
+        ReportFundInfoDTO info = new ReportFundInfoDTO(params.getFileId());
+        this.buildInfo(fundInfoMap, info);
+        return info;
+    }
+
+    @Override
+    protected AnnuallyReportData buildExtData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo,
+                                              List<ReportShareChangeDTO> shareChanges,
+                                              List<ReportAssetAllocationDTO> assetAllocations,
+                                              List<ReportInvestmentIndustryDTO> investmentIndustries,
+                                              Function<Table, Map<String, Object>> function) {
+        // 处理财务指标
+        List<ReportFinancialIndicatorsDTO> financialIndicators = this.buildFinancialIndicatorsInfo(reportInfo.getFileId());
+        // 返回数据构建
+        AnnuallyReportData reportData = new AnnuallyReportData(reportInfo, fundInfo);
+        reportData.setShareChange(shareChanges);
+        reportData.setFinancialIndicators(financialIndicators);
+        reportData.setAssetAllocation(assetAllocations);
+        reportData.setInvestmentIndustry(investmentIndustries);
+        return reportData;
+    }
+
+    @Override
+    protected void cleaningReportData(AnnuallyReportData reportData) {
+        // todo 数据清洗
+    }
+
+    private List<ReportFinancialIndicatorsDTO> buildFinancialIndicatorsInfo(Integer fileId) {
+        List<ReportFinancialIndicatorsDTO> dtos = ListUtil.list(false);
+        // 分级基金
+        List<String> levels = this.matchTieredFund(String.join(",", this.textList));
+        levels.add(0, "母基金");
+        // 假设这里可能存在分级基金,不存在表格跨页
+        for (int k = 0; k < this.financialIndicatorsTables.size(); k++) {
+            Table table = this.financialIndicatorsTables.get(k);
+            int colCount = table.getColCount();
+            for (int j = 1; j < colCount; j++) {
+                Map<String, Object> infoMap = MapUtil.newHashMap(16);
+                String year = this.cleaningValue(table.getCell(0, j).getText());
+                infoMap.put("年度", year);
+                for (int i = 0; i < table.getRowCount(); i++) {
+                    String columnName = this.cleaningValue(table.getCell(i, 0).getText());
+                    if (!CollUtil.contains(FINANCIAL_INDICATORS_COLUMN_NAMES, columnName)) {
+                        continue;
+                    }
+                    String value = this.cleaningValue(table.getCell(i, j).getText());
+                    infoMap.put(columnName, value);
+                }
+                ReportFinancialIndicatorsDTO dto = new ReportFinancialIndicatorsDTO(fileId);
+                this.buildInfo(infoMap, dto);
+                dto.setLevel(levels.get(k));
+                dtos.add(dto);
+            }
+        }
+        return dtos;
+    }
+}

+ 89 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDMonthlyReportParser.java

@@ -0,0 +1,89 @@
+package com.simuwang.daq.components.report.parser.pdf;
+
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
+import com.simuwang.base.mapper.EmailFieldMappingMapper;
+import com.simuwang.base.pojo.dto.report.MonthlyReportData;
+import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
+import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
+import com.simuwang.base.pojo.dto.report.ReportNetReportDTO;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+import technology.tabula.RectangularTextContainer;
+import technology.tabula.Table;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/11 16:19
+ * @description pdf格式的月报解析
+ */
+@Component(ReportParserConstant.PARSER_PDF_MONTHLY)
+public class PDMonthlyReportParser extends AbstractPDReportParser<MonthlyReportData> {
+    private final List<Table> extNavTables = ListUtil.list(true);
+
+    public PDMonthlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    public String getParser() {
+        return ReportParserConstant.PARSER_PDF_MONTHLY;
+    }
+
+    @Override
+    protected void initTableInfo(List<Table> tables) {
+        // 一般月报是固定的模板,4列表格是基金基本信息,其他5列的表格是月净值
+        for (Table table : tables) {
+            int colCount = table.getColCount();
+            int rowCount = table.getRowCount();
+            if (colCount == 0 && rowCount == 0) {
+                continue;
+            }
+            if (colCount == 4) {
+                this.fundInfoTable = table;
+            } else if (colCount >= 5) {
+                this.extNavTables.add(table);
+            }
+        }
+    }
+
+    @Override
+    protected Map<String, Object> parseFundInfo(Table fundInfoTable) {
+        // 月报的基金基本信息是四列的表格
+        Map<String, Object> baseInfoMap = MapUtil.newHashMap(32);
+        for (int i = 0; i < fundInfoTable.getRows().size(); i++) {
+            @SuppressWarnings("all")
+            List<RectangularTextContainer> cols = fundInfoTable.getRows().get(i);
+            for (int j = 0; j < 2; j++) {
+                baseInfoMap.put(cols.get(j * 2).getText(), cols.get(j * 2 + 1).getText());
+            }
+        }
+        return baseInfoMap;
+    }
+
+    @Override
+    protected MonthlyReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) {
+        MonthlyReportData reportData = new MonthlyReportData(reportInfo, fundInfo);
+        // 母基金和分级基金的净值
+        List<ReportNetReportDTO> dtos = this.buildLevelDto(reportInfo.getFileId(), this.extNavTables,
+                ReportNetReportDTO.class, t -> {
+                    Map<String, Object> extInfoMap = MapUtil.newHashMap(16);
+                    for (int i = 0; i < t.getColCount(); i++) {
+                        String key = t.getCell(0, i).getText();
+                        String value = t.getCell(1, i).getText();
+                        extInfoMap.put(key, value);
+                    }
+                    return extInfoMap;
+                });
+        reportData.setNetReport(dtos);
+        return reportData;
+    }
+
+    @Override
+    protected void cleaningReportData(MonthlyReportData reportData) {
+        // todo 数据清洗
+    }
+}

+ 296 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDQuarterlyReportParser.java

@@ -0,0 +1,296 @@
+package com.simuwang.daq.components.report.parser.pdf;
+
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.StrUtil;
+import com.simuwang.base.mapper.EmailFieldMappingMapper;
+import com.simuwang.base.pojo.dto.report.*;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+import technology.tabula.RectangularTextContainer;
+import technology.tabula.Table;
+
+import java.awt.geom.Rectangle2D;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 17:53
+ * @description pdf格式的季报解析逻辑
+ */
+@Component(ReportParserConstant.PARSER_PDF_QUARTERLY)
+public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends AbstractPDReportParser<T> {
+    protected static final List<String> INDUSTRY_COLUMN_NAMES = ListUtil.list(false);
+    protected static final List<String> SHARE_CHANGE_COLUMN_NAMES = ListUtil.list(false);
+
+    static {
+        // 中国证监会行业标准
+        INDUSTRY_COLUMN_NAMES.add("农、林、牧、渔业");
+        INDUSTRY_COLUMN_NAMES.add("采矿业");
+        INDUSTRY_COLUMN_NAMES.add("制造业");
+        INDUSTRY_COLUMN_NAMES.add("电力、热力、燃气及水生产和供应业");
+        INDUSTRY_COLUMN_NAMES.add("建筑业");
+        INDUSTRY_COLUMN_NAMES.add("批发和零售业");
+        INDUSTRY_COLUMN_NAMES.add("交通运输、仓储和邮政业");
+        INDUSTRY_COLUMN_NAMES.add("住宿和餐饮业");
+        INDUSTRY_COLUMN_NAMES.add("信息传输、软件和信息技术服务业");
+        INDUSTRY_COLUMN_NAMES.add("金融业");
+        INDUSTRY_COLUMN_NAMES.add("房地产业");
+        INDUSTRY_COLUMN_NAMES.add("租赁和商务服务业");
+        INDUSTRY_COLUMN_NAMES.add("科学研究和技术服务业");
+        INDUSTRY_COLUMN_NAMES.add("水利、环境和公共设施管理业");
+        INDUSTRY_COLUMN_NAMES.add("居民服务、修理和其他服务业");
+        INDUSTRY_COLUMN_NAMES.add("教育");
+        INDUSTRY_COLUMN_NAMES.add("卫生和社会工作");
+        INDUSTRY_COLUMN_NAMES.add("文化、体育和娱乐业");
+        INDUSTRY_COLUMN_NAMES.add("综合");
+
+        INDUSTRY_COLUMN_NAMES.add("港股通");
+
+        // 以下为国际标准
+        INDUSTRY_COLUMN_NAMES.add("能源");
+        INDUSTRY_COLUMN_NAMES.add("原材料");
+        INDUSTRY_COLUMN_NAMES.add("工业");
+        INDUSTRY_COLUMN_NAMES.add("非日常生活消费品");
+        INDUSTRY_COLUMN_NAMES.add("日常消费品");
+        INDUSTRY_COLUMN_NAMES.add("医疗保健");
+        INDUSTRY_COLUMN_NAMES.add("金融");
+        INDUSTRY_COLUMN_NAMES.add("信息技术");
+        INDUSTRY_COLUMN_NAMES.add("通讯服务");
+        INDUSTRY_COLUMN_NAMES.add("公用事业");
+        INDUSTRY_COLUMN_NAMES.add("房地产");
+
+        // 份额变动表格识别列
+        SHARE_CHANGE_COLUMN_NAMES.add("报告期期初基金份额总额");
+        SHARE_CHANGE_COLUMN_NAMES.add("减:报告期期间基金总赎回份额");
+        SHARE_CHANGE_COLUMN_NAMES.add("期末基金总份额/期末基金实缴总额");
+        SHARE_CHANGE_COLUMN_NAMES.add("报告期期间基金拆分变动份额");
+        SHARE_CHANGE_COLUMN_NAMES.add("报告期期间基金总申购份额");
+    }
+
+    protected List<Table> financialIndicatorsTables;
+    protected List<Table> shareChangeTables;
+    protected List<Table> assetAllocationTables;
+    protected List<Table> investmentIndustryTables;
+
+    public PDQuarterlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
+        super(fieldMappingMapper);
+    }
+
+    @Override
+    public String getParser() {
+        return ReportParserConstant.PARSER_PDF_QUARTERLY;
+    }
+
+    @Override
+    protected void initTableInfo(List<Table> tables) {
+        this.financialIndicatorsTables = ListUtil.list(true);
+        this.shareChangeTables = ListUtil.list(true);
+        this.assetAllocationTables = ListUtil.list(true);
+        this.investmentIndustryTables = ListUtil.list(true);
+        for (Table table : tables) {
+            int colCount = table.getColCount();
+            int rowCount = table.getRowCount();
+            if (colCount == 0 && rowCount == 0) {
+                continue;
+            }
+            if (rowCount == 13 && colCount == 2) {
+                this.fundInfoTable = table;
+            } else if (colCount == 2) {
+                // 用表格的第一列的数据判断是否份额变动记录
+                List<String> texts = this.getTableColTexts(table, 0);
+                // 主要财务指标或份额变动
+                if (CollUtil.containsAny(texts, SHARE_CHANGE_COLUMN_NAMES)) {
+                    this.shareChangeTables.add(table);
+                } else {
+                    this.financialIndicatorsTables.add(table);
+                }
+            } else if (colCount == 4) {
+                // 行业配置
+                this.investmentIndustryTables.add(table);
+            } else if (colCount == 3) {
+                // 用表格的第一列单元格判断是否资产配置表
+                List<String> texts = this.getTableColTexts(table, 0);
+                if (CollUtil.containsAny(texts, INDUSTRY_COLUMN_NAMES)) {
+                    this.investmentIndustryTables.add(table);
+                } else {
+                    this.assetAllocationTables.add(table);
+                }
+            }
+        }
+    }
+
+    @Override
+    protected Map<String, Object> parseFundInfo(Table fundInfoTable) {
+        // 季报和年报的基金基本信息是两列的表格
+        Map<String, Object> baseInfoMap = MapUtil.newHashMap(32);
+        for (int i = 0; i < fundInfoTable.getRows().size(); i++) {
+            @SuppressWarnings("all")
+            List<RectangularTextContainer> cols = fundInfoTable.getRows().get(i);
+            for (int j = 0; j < 1; j++) {
+                baseInfoMap.put(cols.get(j).getText(), cols.get(j + 1).getText());
+            }
+        }
+        return baseInfoMap;
+    }
+
+    @Override
+    protected T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) {
+        Integer fileId = reportInfo.getFileId();
+        // 表格转换数据获取函数
+        Function<Table, Map<String, Object>> function = t -> {
+            Map<String, Object> extInfoMap = MapUtil.newHashMap(16);
+            for (int i = 0; i < t.getRowCount(); i++) {
+                String key = t.getCell(i, 0).getText();
+                String value = t.getCell(i, 1).getText();
+                extInfoMap.put(key, value);
+            }
+            return extInfoMap;
+        };
+        // 份额变动
+        List<ReportShareChangeDTO> shareChanges = this.buildLevelDto(fileId, this.shareChangeTables,
+                ReportShareChangeDTO.class, function);
+        // 主要财务指标
+        List<ReportFinancialIndicatorsDTO> financialIndicators = this.buildLevelDto(fileId, this.financialIndicatorsTables,
+                ReportFinancialIndicatorsDTO.class, function);
+        // 资产配置
+        List<ReportAssetAllocationDTO> assetAllocations = this.buildAssetAllocationInfo(fileId);
+        // 行业配置
+        List<ReportInvestmentIndustryDTO> investmentIndustries = this.buildInvestmentIndustryInfo(fileId);
+        // 返回数据构建
+        QuarterlyReportData reportData = new QuarterlyReportData(reportInfo, fundInfo);
+        reportData.setShareChange(shareChanges);
+        reportData.setFinancialIndicators(financialIndicators);
+        reportData.setAssetAllocation(assetAllocations);
+        reportData.setInvestmentIndustry(investmentIndustries);
+        return this.buildExtData(reportInfo, fundInfo, shareChanges, assetAllocations, investmentIndustries, function);
+    }
+
+    protected T buildExtData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo,
+                             List<ReportShareChangeDTO> shareChanges,
+                             List<ReportAssetAllocationDTO> assetAllocations,
+                             List<ReportInvestmentIndustryDTO> investmentIndustries,
+                             Function<Table, Map<String, Object>> function) {
+        Integer fileId = reportInfo.getFileId();
+        // 主要财务指标
+        List<ReportFinancialIndicatorsDTO> financialIndicators = this.buildLevelDto(fileId, this.financialIndicatorsTables,
+                ReportFinancialIndicatorsDTO.class, function);
+        QuarterlyReportData reportData = new QuarterlyReportData(reportInfo, fundInfo);
+        reportData.setShareChange(shareChanges);
+        reportData.setFinancialIndicators(financialIndicators);
+        reportData.setAssetAllocation(assetAllocations);
+        reportData.setInvestmentIndustry(investmentIndustries);
+        @SuppressWarnings("unchecked")
+        T t = (T) reportData;
+        return t;
+    }
+
+    @Override
+    protected void cleaningReportData(T reportData) {
+        // todo 数据清洗
+    }
+
+    /**
+     * 构建基金行业配置解析数据
+     *
+     * @return /
+     */
+    private List<ReportInvestmentIndustryDTO> buildInvestmentIndustryInfo(Integer fileId) {
+        List<ReportInvestmentIndustryDTO> dtos = ListUtil.list(false);
+        for (Table table : this.investmentIndustryTables) {
+            int colCount = table.getColCount();
+            // 投资地区: 1-境内, 2-港股通
+            int investType = colCount == 4 ? 1 : 2;
+            int j = colCount == 4 ? 1 : 0;
+            // 按行遍历
+            for (int i = 0; i < table.getRowCount(); i++) {
+                String text = this.cleaningValue(table.getCell(i, 0).getText());
+                if (StrUtil.containsAny(text, "序号", "行业类别")) {
+                    continue;
+                }
+                ReportInvestmentIndustryDTO dto = new ReportInvestmentIndustryDTO(fileId);
+                dto.setInvestType(investType);
+                dto.setIndustryName(this.cleaningValue(table.getCell(i, j).getText()));
+                dto.setMarketValue(this.cleaningValue(table.getCell(i, j + 1).getText()));
+                dto.setRatio(this.cleaningValue(table.getCell(i, j + 2).getText()));
+                dtos.add(dto);
+            }
+        }
+        return dtos;
+    }
+
+    /**
+     * 构建基金资产配置解析数据
+     *
+     * @param fileId 文件id
+     * @return /
+     */
+    private List<ReportAssetAllocationDTO> buildAssetAllocationInfo(Integer fileId) {
+        List<ReportAssetAllocationDTO> dtos = ListUtil.list(false);
+        String assetType = null;
+        for (Table table : this.assetAllocationTables) {
+            // 按行遍历
+            for (@SuppressWarnings("all") List<RectangularTextContainer> row : table.getRows()) {
+                // x坐标升序(防止部分行乱序问题)
+                row.sort(Comparator.comparing(Rectangle2D.Float::getX));
+                // 大类
+                String type = this.cleaningValue(row.get(0).getText());
+                if (StrUtil.isNotBlank(type)) {
+                    assetType = type;
+                }
+                // 金额、市值,有时是 “备注#金额”的格式
+                String marketValueAndRemark = this.cleaningValue(row.get(2).getText());
+                if (StrUtil.isBlank(marketValueAndRemark) || StrUtil.isBlank(assetType)) {
+                    continue;
+                }
+                // 资产明细
+                String detail = this.cleaningValue(row.get(1).getText(), false);
+                if (StrUtil.contains(marketValueAndRemark, "#")) {
+                    // 有#表示有备注,而且可能有多个,多个用分号分隔的.
+                    List<String> marketValueAndRemarks = StrUtil.split(marketValueAndRemark, ";");
+                    for (String mr : marketValueAndRemarks) {
+                        if (StrUtil.isBlank(mr)) {
+                            continue;
+                        }
+                        List<String> mrs = StrUtil.split(mr, "#");
+                        ReportAssetAllocationDTO dto = new ReportAssetAllocationDTO(fileId);
+                        dto.setAssetType(assetType);
+                        dto.setAssetDetails(detail);
+                        dto.setMarketValue(mrs.get(1));
+                        dto.setRemark(mrs.get(0));
+                        dtos.add(dto);
+                    }
+                } else {
+                    ReportAssetAllocationDTO dto = new ReportAssetAllocationDTO(fileId);
+                    dto.setAssetType(assetType);
+                    dto.setAssetDetails(detail);
+                    dto.setMarketValue(marketValueAndRemark);
+                    dtos.add(dto);
+                }
+            }
+        }
+        return dtos;
+    }
+
+    /**
+     * 获取表格指定列的所有文字内容
+     *
+     * @param table 表格
+     * @param col   指定列
+     * @return /
+     */
+    protected List<String> getTableColTexts(Table table, Integer col) {
+        List<String> details = ListUtil.list(false);
+        for (@SuppressWarnings("all") List<RectangularTextContainer> row : table.getRows()) {
+            String detail = this.cleaningValue(row.get(col).getText(), false);
+            if (StrUtil.isNotBlank(detail)) {
+                details.add(detail);
+            }
+        }
+        return details;
+    }
+}

+ 78 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/AbstractPyReportParser.java

@@ -0,0 +1,78 @@
+package com.simuwang.daq.components.report.parser.py;
+
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.StrUtil;
+import cn.hutool.http.HttpUtil;
+import cn.hutool.json.JSONUtil;
+import com.simuwang.base.common.enums.ReportType;
+import com.simuwang.base.common.exception.ReportParseException;
+import com.simuwang.base.config.DaqProperties;
+import com.simuwang.base.mapper.FundInfoMapper;
+import com.simuwang.base.pojo.dos.FundAndCompanyInfoDO;
+import com.simuwang.base.pojo.dto.report.ParseResult;
+import com.simuwang.base.pojo.dto.report.ReportData;
+import com.simuwang.base.pojo.dto.report.ReportParseStatus;
+import com.simuwang.base.pojo.dto.report.ReportParserParams;
+import com.simuwang.daq.components.PythonReportConverter;
+import com.simuwang.daq.components.report.parser.ReportParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 16:46
+ * @description python解析报告的抽象类
+ */
+public abstract class AbstractPyReportParser<T extends ReportData> implements ReportParser<T> {
+    protected final Logger logger = LoggerFactory.getLogger(this.getClass());
+
+    private final DaqProperties properties;
+    private final FundInfoMapper fundInfoMapper;
+
+    public AbstractPyReportParser(DaqProperties properties, FundInfoMapper fundInfoMapper) {
+        this.properties = properties;
+        this.fundInfoMapper = fundInfoMapper;
+    }
+
+    @Override
+    public T parse(ReportParserParams params) throws IOException, ReportParseException {
+        Boolean enablePyParser = this.properties.getEnablePyParser();
+        if (!enablePyParser) {
+            this.logger.error("The python report parser is unavailable!");
+            return null;
+        }
+        String pyBaseUrl = this.properties.getPyBaseUrl();
+        ReportType reportType = this.getReportType();
+        String registerNumber = params.getRegisterNumber();
+        String api = "/api/v1/parse/amac_report";
+        Map<String, Object> param = MapUtil.newHashMap(16);
+        param.put("file_id", params.getFileId());
+        param.put("file_path", params.getFilepath());
+        param.put("register_number", registerNumber);
+        param.put("file_type", reportType.getType());
+        param.put("file_name", params.getFilename());
+        if (StrUtil.isNotBlank(registerNumber)) {
+            FundAndCompanyInfoDO info = this.fundInfoMapper.queryFundAndTrustByRegisterNumber(registerNumber);
+            if (info != null) {
+                param.put("fund_name", info.getFundName());
+                param.put("trust_name", info.getCompanyName());
+            }
+        }
+        String body = HttpUtil.post(pyBaseUrl + api, JSONUtil.toJsonStr(params));
+        ParseResult<T> result = PythonReportConverter.convert(JSONUtil.parseObj(body), reportType);
+        if (result.getStatus() == null) {
+            throw new ReportParseException(ReportParseStatus.PARSE_FAIL, "资源文件不存在");
+        }
+        if (!Objects.equals(1, result.getStatus())) {
+            this.logger.error("报告{} 解析失败:{}", params, result.getMsg());
+            throw new ReportParseException(result.getStatus(), result.getMsg());
+        }
+        return result.getData();
+    }
+
+    protected abstract ReportType getReportType();
+}

+ 25 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/PythonAnnuallyReportParser.java

@@ -0,0 +1,25 @@
+package com.simuwang.daq.components.report.parser.py;
+
+import com.simuwang.base.common.enums.ReportType;
+import com.simuwang.base.config.DaqProperties;
+import com.simuwang.base.mapper.FundInfoMapper;
+import com.simuwang.base.pojo.dto.report.AnnuallyReportData;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+
+@Component(ReportParserConstant.PARSER_PYTHON_ANNUALLY)
+public class PythonAnnuallyReportParser extends AbstractPyReportParser<AnnuallyReportData> {
+    public PythonAnnuallyReportParser(DaqProperties properties, FundInfoMapper fundInfoMapper) {
+        super(properties, fundInfoMapper);
+    }
+
+    @Override
+    protected ReportType getReportType() {
+        return ReportType.ANNUALLY;
+    }
+
+    @Override
+    public String getParser() {
+        return ReportParserConstant.PARSER_PYTHON_ANNUALLY;
+    }
+}

+ 25 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/PythonMonthlyReportParser.java

@@ -0,0 +1,25 @@
+package com.simuwang.daq.components.report.parser.py;
+
+import com.simuwang.base.common.enums.ReportType;
+import com.simuwang.base.config.DaqProperties;
+import com.simuwang.base.mapper.FundInfoMapper;
+import com.simuwang.base.pojo.dto.report.MonthlyReportData;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+
+@Component(ReportParserConstant.PARSER_PYTHON_MONTHLY)
+public class PythonMonthlyReportParser extends AbstractPyReportParser<MonthlyReportData> {
+    public PythonMonthlyReportParser(DaqProperties properties, FundInfoMapper fundInfoMapper) {
+        super(properties, fundInfoMapper);
+    }
+
+    @Override
+    protected ReportType getReportType() {
+        return ReportType.MONTHLY;
+    }
+
+    @Override
+    public String getParser() {
+        return ReportParserConstant.PARSER_PYTHON_MONTHLY;
+    }
+}

+ 25 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/parser/py/PythonQuarterlyReportParser.java

@@ -0,0 +1,25 @@
+package com.simuwang.daq.components.report.parser.py;
+
+import com.simuwang.base.common.enums.ReportType;
+import com.simuwang.base.config.DaqProperties;
+import com.simuwang.base.mapper.FundInfoMapper;
+import com.simuwang.base.pojo.dto.report.QuarterlyReportData;
+import com.simuwang.daq.components.report.parser.ReportParserConstant;
+import org.springframework.stereotype.Component;
+
+@Component(ReportParserConstant.PARSER_PYTHON_QUARTERLY)
+public class PythonQuarterlyReportParser extends AbstractPyReportParser<QuarterlyReportData> {
+    public PythonQuarterlyReportParser(DaqProperties properties, FundInfoMapper fundInfoMapper) {
+        super(properties, fundInfoMapper);
+    }
+
+    @Override
+    protected ReportType getReportType() {
+        return ReportType.QUARTERLY;
+    }
+
+    @Override
+    public String getParser() {
+        return ReportParserConstant.PARSER_PYTHON_QUARTERLY;
+    }
+}

+ 56 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/writer/AbstractReportWriter.java

@@ -0,0 +1,56 @@
+package com.simuwang.daq.components.report.writer;
+
+import com.simuwang.base.mapper.report.ReportBaseInfoMapper;
+import com.simuwang.base.mapper.report.ReportFundInfoMapper;
+import com.simuwang.base.pojo.dos.report.ReportBaseInfoDO;
+import com.simuwang.base.pojo.dos.report.ReportFundInfoDO;
+import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
+import com.simuwang.base.pojo.dto.report.ReportData;
+import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.transaction.annotation.Transactional;
+
+public abstract class AbstractReportWriter<T extends ReportData> implements ReportWriter<T> {
+    private final Logger logger = LoggerFactory.getLogger(this.getClass());
+
+    private final ReportBaseInfoMapper baseInfoMapper;
+    private final ReportFundInfoMapper fundInfoMapper;
+
+    public AbstractReportWriter(ReportBaseInfoMapper baseInfoMapper, ReportFundInfoMapper fundInfoMapper) {
+        this.baseInfoMapper = baseInfoMapper;
+        this.fundInfoMapper = fundInfoMapper;
+    }
+
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public void write(T reportData) {
+        if (reportData == null) {
+            this.logger.error("The report no result!");
+            return;
+        }
+        // 基本信息+基金信息保存
+        this.saveBaseInfo(reportData);
+        this.saveFundInfo(reportData);
+        // 其他信息保存
+        this.writeExtData(reportData);
+    }
+
+    private void saveBaseInfo(T reportData) {
+        ReportBaseInfoDTO baseInfo = reportData.getBaseInfo();
+        if (baseInfo != null) {
+            ReportBaseInfoDO entity = baseInfo.toEntity();
+            this.baseInfoMapper.insert(entity);
+        }
+    }
+
+    private void saveFundInfo(T reportData) {
+        ReportFundInfoDTO fundInfo = reportData.getFundInfo();
+        if (fundInfo != null) {
+            ReportFundInfoDO entity = fundInfo.toEntity();
+            this.fundInfoMapper.insert(entity);
+        }
+    }
+
+    protected abstract void writeExtData(T reportData);
+}

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/AnnuallyReportWriter.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 
 import com.simuwang.base.mapper.report.*;
 import com.simuwang.base.mapper.report.*;
 import com.simuwang.base.pojo.dto.report.AnnuallyReportData;
 import com.simuwang.base.pojo.dto.report.AnnuallyReportData;

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/MonthlyReportWriter.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 
 import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.CollUtil;
 import com.simuwang.base.mapper.report.ReportBaseInfoMapper;
 import com.simuwang.base.mapper.report.ReportBaseInfoMapper;

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/QuarterlyReportWriter.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 
 import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.CollUtil;
 import com.simuwang.base.mapper.report.*;
 import com.simuwang.base.mapper.report.*;

+ 12 - 0
service-daq/src/main/java/com/simuwang/daq/components/report/writer/ReportWriter.java

@@ -0,0 +1,12 @@
+package com.simuwang.daq.components.report.writer;
+
+import com.simuwang.base.pojo.dto.report.ReportData;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/29 14:06
+ * @description 报告存储保存的服务业务(可以扩展支持保存到本地缓存或文件)
+ */
+public interface ReportWriter<T extends ReportData> {
+    void write(T reportData);
+}

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriterConstant.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.map.MapUtil;
 import com.simuwang.base.common.enums.ReportType;
 import com.simuwang.base.common.enums.ReportType;

+ 1 - 1
service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriterFactory.java

@@ -1,4 +1,4 @@
-package com.simuwang.daq.components.writer;
+package com.simuwang.daq.components.report.writer;
 
 
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.map.MapUtil;
 import com.simuwang.base.common.enums.ReportType;
 import com.simuwang.base.common.enums.ReportType;

+ 0 - 73
service-daq/src/main/java/com/simuwang/daq/components/writer/AbstractReportWriter.java

@@ -1,73 +0,0 @@
-package com.simuwang.daq.components.writer;
-
-import cn.hutool.core.exceptions.ExceptionUtil;
-import com.simuwang.base.mapper.report.ReportBaseInfoMapper;
-import com.simuwang.base.mapper.report.ReportFundInfoMapper;
-import com.simuwang.base.pojo.dos.report.ReportBaseInfoDO;
-import com.simuwang.base.pojo.dos.report.ReportFundInfoDO;
-import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
-import com.simuwang.base.pojo.dto.report.ReportData;
-import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.springframework.transaction.annotation.Transactional;
-import org.springframework.util.StopWatch;
-
-public abstract class AbstractReportWriter<T extends ReportData> implements ReportWriter<T> {
-    private final Logger logger = LoggerFactory.getLogger(this.getClass());
-
-    private final ReportBaseInfoMapper baseInfoMapper;
-    private final ReportFundInfoMapper fundInfoMapper;
-
-    public AbstractReportWriter(ReportBaseInfoMapper baseInfoMapper, ReportFundInfoMapper fundInfoMapper) {
-        this.baseInfoMapper = baseInfoMapper;
-        this.fundInfoMapper = fundInfoMapper;
-    }
-
-    @Override
-    @Transactional(rollbackFor = Exception.class)
-    public void write(T reportData) {
-        StopWatch stopWatch = new StopWatch();
-        stopWatch.start();
-        // 基本信息+基金信息保存
-        this.saveBaseInfo(reportData);
-        this.saveFundInfo(reportData);
-        try {
-            // 其他信息保存
-            this.writeExtData(reportData);
-        } catch (Exception e) {
-            this.logger.error("报告解析结果之类型特有数据保存报错\n{}", ExceptionUtil.stacktraceToString(e));
-        }
-        stopWatch.stop();
-        long totalTimeMillis = stopWatch.getTotalTimeMillis();
-        if (this.logger.isInfoEnabled()) {
-            this.logger.info("报告解析结果保存成功,耗时:{}ms", totalTimeMillis);
-        }
-    }
-
-    private void saveBaseInfo(T reportData) {
-        try {
-            ReportBaseInfoDTO baseInfo = reportData.getBaseInfo();
-            if (baseInfo != null) {
-                ReportBaseInfoDO entity = baseInfo.toEntity();
-                this.baseInfoMapper.insert(entity);
-            }
-        } catch (Exception e) {
-            this.logger.error("报告解析结果之报告基本信息保存报错\n{}", ExceptionUtil.stacktraceToString(e));
-        }
-    }
-
-    private void saveFundInfo(T reportData) {
-        try {
-            ReportFundInfoDTO fundInfo = reportData.getFundInfo();
-            if (fundInfo != null) {
-                ReportFundInfoDO entity = fundInfo.toEntity();
-                this.fundInfoMapper.insert(entity);
-            }
-        } catch (Exception e) {
-            this.logger.error("报告解析结果之基金信息保存报错\n{}", ExceptionUtil.stacktraceToString(e));
-        }
-    }
-
-    protected abstract void writeExtData(T reportData);
-}

+ 0 - 7
service-daq/src/main/java/com/simuwang/daq/components/writer/ReportWriter.java

@@ -1,7 +0,0 @@
-package com.simuwang.daq.components.writer;
-
-import com.simuwang.base.pojo.dto.report.ReportData;
-
-public interface ReportWriter<T extends ReportData> {
-    void write(T reportData);
-}

+ 0 - 43
service-daq/src/main/java/com/simuwang/daq/dto/MonthlyReportNavInfo.java

@@ -1,43 +0,0 @@
-package com.simuwang.daq.dto;
-
-public class MonthlyReportNavInfo extends ReportExtInfo {
-    private String valuationDate;
-
-    private String nav;
-
-    private String endTotalShares;
-
-    private String fundAssetSize;
-
-    public String getValuationDate() {
-        return valuationDate;
-    }
-
-    public void setValuationDate(String valuationDate) {
-        this.valuationDate = valuationDate;
-    }
-
-    public String getNav() {
-        return nav;
-    }
-
-    public void setNav(String nav) {
-        this.nav = nav;
-    }
-
-    public String getEndTotalShares() {
-        return endTotalShares;
-    }
-
-    public void setEndTotalShares(String endTotalShares) {
-        this.endTotalShares = endTotalShares;
-    }
-
-    public String getFundAssetSize() {
-        return fundAssetSize;
-    }
-
-    public void setFundAssetSize(String fundAssetSize) {
-        this.fundAssetSize = fundAssetSize;
-    }
-}

+ 0 - 13
service-daq/src/main/java/com/simuwang/daq/dto/ReportExtInfo.java

@@ -1,13 +0,0 @@
-package com.simuwang.daq.dto;
-
-public class ReportExtInfo {
-    private Integer fileId;
-
-    public Integer getFileId() {
-        return fileId;
-    }
-
-    public void setFileId(Integer fileId) {
-        this.fileId = fileId;
-    }
-}

+ 0 - 18
service-daq/src/main/java/com/simuwang/daq/dto/ReportFileType.java

@@ -1,18 +0,0 @@
-package com.simuwang.daq.dto;
-
-public enum ReportFileType {
-    PDF("pdf"),
-    DOCX("docx"),
-    DOC("doc"),
-    XLSX("xlsx");
-
-    private final String suffix;
-
-    ReportFileType(String suffix) {
-        this.suffix = suffix;
-    }
-
-    public String getSuffix() {
-        return suffix;
-    }
-}

+ 0 - 291
service-daq/src/main/java/com/simuwang/daq/dto/ReportFundInfo.java

@@ -1,291 +0,0 @@
-package com.simuwang.daq.dto;
-
-/**
- * @author wangzaijun
- * @date 2024/9/12 15:34
- * @description 报告解析的基金信息
- */
-public class ReportFundInfo {
-    private String fundName;
-    private String cFundName;
-    /**
-     * 是否分级基金
-     */
-    private Integer istiered;
-    /**
-     * 备案编码
-     */
-    private String registerNumber;
-    private String trustName;
-    private String custodianName;
-    private String advisorName;
-    /**
-     * 运作方式 开放式或封闭式
-     */
-    private String operationType;
-    private String fundType;
-    /**
-     * 成立日期
-     */
-    private String inceptionDate;
-    private String sharePerAsset;
-    private String investmentObjective;
-    private String fundStrategyDescription;
-    private String secondaryBenchmark;
-    private String riskReturnDesc;
-    private String realizedIncome;
-    private String profit;
-    private String fundAssetSize;
-    private String nav;
-    private String initTotalShares;
-    private String subscription;
-    private String redemption;
-    private String split;
-    /**
-     * 杠杆信息描述
-     */
-    private String leverageNote;
-    /**
-     * 杠杆比例
-     */
-    private String leverage;
-    private String remark;
-    private String industryTrend;
-    private String fundManager;
-    /**
-     * 是否托管复核
-     */
-    private String reviewed;
-
-    public String getFundName() {
-        return fundName;
-    }
-
-    public void setFundName(String fundName) {
-        this.fundName = fundName;
-    }
-
-    public String getcFundName() {
-        return cFundName;
-    }
-
-    public void setcFundName(String cFundName) {
-        this.cFundName = cFundName;
-    }
-
-    public Integer getIstiered() {
-        return istiered;
-    }
-
-    public void setIstiered(Integer istiered) {
-        this.istiered = istiered;
-    }
-
-    public String getRegisterNumber() {
-        return registerNumber;
-    }
-
-    public void setRegisterNumber(String registerNumber) {
-        this.registerNumber = registerNumber;
-    }
-
-    public String getTrustName() {
-        return trustName;
-    }
-
-    public void setTrustName(String trustName) {
-        this.trustName = trustName;
-    }
-
-    public String getCustodianName() {
-        return custodianName;
-    }
-
-    public void setCustodianName(String custodianName) {
-        this.custodianName = custodianName;
-    }
-
-    public String getAdvisorName() {
-        return advisorName;
-    }
-
-    public void setAdvisorName(String advisorName) {
-        this.advisorName = advisorName;
-    }
-
-    public String getOperationType() {
-        return operationType;
-    }
-
-    public void setOperationType(String operationType) {
-        this.operationType = operationType;
-    }
-
-    public String getFundType() {
-        return fundType;
-    }
-
-    public void setFundType(String fundType) {
-        this.fundType = fundType;
-    }
-
-    public String getInceptionDate() {
-        return inceptionDate;
-    }
-
-    public void setInceptionDate(String inceptionDate) {
-        this.inceptionDate = inceptionDate;
-    }
-
-    public String getSharePerAsset() {
-        return sharePerAsset;
-    }
-
-    public void setSharePerAsset(String sharePerAsset) {
-        this.sharePerAsset = sharePerAsset;
-    }
-
-    public String getInvestmentObjective() {
-        return investmentObjective;
-    }
-
-    public void setInvestmentObjective(String investmentObjective) {
-        this.investmentObjective = investmentObjective;
-    }
-
-    public String getFundStrategyDescription() {
-        return fundStrategyDescription;
-    }
-
-    public void setFundStrategyDescription(String fundStrategyDescription) {
-        this.fundStrategyDescription = fundStrategyDescription;
-    }
-
-    public String getSecondaryBenchmark() {
-        return secondaryBenchmark;
-    }
-
-    public void setSecondaryBenchmark(String secondaryBenchmark) {
-        this.secondaryBenchmark = secondaryBenchmark;
-    }
-
-    public String getRiskReturnDesc() {
-        return riskReturnDesc;
-    }
-
-    public void setRiskReturnDesc(String riskReturnDesc) {
-        this.riskReturnDesc = riskReturnDesc;
-    }
-
-    public String getRealizedIncome() {
-        return realizedIncome;
-    }
-
-    public void setRealizedIncome(String realizedIncome) {
-        this.realizedIncome = realizedIncome;
-    }
-
-    public String getProfit() {
-        return profit;
-    }
-
-    public void setProfit(String profit) {
-        this.profit = profit;
-    }
-
-    public String getFundAssetSize() {
-        return fundAssetSize;
-    }
-
-    public void setFundAssetSize(String fundAssetSize) {
-        this.fundAssetSize = fundAssetSize;
-    }
-
-    public String getNav() {
-        return nav;
-    }
-
-    public void setNav(String nav) {
-        this.nav = nav;
-    }
-
-    public String getInitTotalShares() {
-        return initTotalShares;
-    }
-
-    public void setInitTotalShares(String initTotalShares) {
-        this.initTotalShares = initTotalShares;
-    }
-
-    public String getSubscription() {
-        return subscription;
-    }
-
-    public void setSubscription(String subscription) {
-        this.subscription = subscription;
-    }
-
-    public String getRedemption() {
-        return redemption;
-    }
-
-    public void setRedemption(String redemption) {
-        this.redemption = redemption;
-    }
-
-    public String getSplit() {
-        return split;
-    }
-
-    public void setSplit(String split) {
-        this.split = split;
-    }
-
-    public String getLeverageNote() {
-        return leverageNote;
-    }
-
-    public void setLeverageNote(String leverageNote) {
-        this.leverageNote = leverageNote;
-    }
-
-    public String getLeverage() {
-        return leverage;
-    }
-
-    public void setLeverage(String leverage) {
-        this.leverage = leverage;
-    }
-
-    public String getRemark() {
-        return remark;
-    }
-
-    public void setRemark(String remark) {
-        this.remark = remark;
-    }
-
-    public String getIndustryTrend() {
-        return industryTrend;
-    }
-
-    public void setIndustryTrend(String industryTrend) {
-        this.industryTrend = industryTrend;
-    }
-
-    public String getFundManager() {
-        return fundManager;
-    }
-
-    public void setFundManager(String fundManager) {
-        this.fundManager = fundManager;
-    }
-
-    public String getReviewed() {
-        return reviewed;
-    }
-
-    public void setReviewed(String reviewed) {
-        this.reviewed = reviewed;
-    }
-}

+ 0 - 54
service-daq/src/main/java/com/simuwang/daq/dto/ReportInfo.java

@@ -1,54 +0,0 @@
-package com.simuwang.daq.dto;
-
-/**
- * @author wangzaijun
- * @date 2024/9/11 17:57
- * @description 报告基本信息
- */
-public class ReportInfo {
-    private Integer fileId;
-    /**
-     * 报告名称
-     */
-    private String reportName;
-    /**
-     * 报告类型(月、季、年)
-     */
-    private String reportType;
-    /**
-     * 报告日期
-     */
-    private String reportDate;
-
-    public Integer getFileId() {
-        return fileId;
-    }
-
-    public void setFileId(Integer fileId) {
-        this.fileId = fileId;
-    }
-
-    public String getReportName() {
-        return reportName;
-    }
-
-    public void setReportName(String reportName) {
-        this.reportName = reportName;
-    }
-
-    public String getReportType() {
-        return reportType;
-    }
-
-    public void setReportType(String reportType) {
-        this.reportType = reportType;
-    }
-
-    public String getReportDate() {
-        return reportDate;
-    }
-
-    public void setReportDate(String reportDate) {
-        this.reportDate = reportDate;
-    }
-}

+ 97 - 71
service-daq/src/main/java/com/simuwang/daq/service/EmailParseService.java

@@ -8,9 +8,10 @@ import cn.hutool.core.date.DateUtil;
 import cn.hutool.core.exceptions.ExceptionUtil;
 import cn.hutool.core.exceptions.ExceptionUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
 import cn.hutool.core.util.StrUtil;
-import cn.hutool.http.HttpUtil;
-import cn.hutool.json.JSONUtil;
 import com.simuwang.base.common.conts.*;
 import com.simuwang.base.common.conts.*;
+import com.simuwang.base.common.enums.ReportParserFileType;
+import com.simuwang.base.common.enums.ReportType;
+import com.simuwang.base.common.exception.ReportParseException;
 import com.simuwang.base.common.util.EmailUtil;
 import com.simuwang.base.common.util.EmailUtil;
 import com.simuwang.base.common.util.ExcelUtil;
 import com.simuwang.base.common.util.ExcelUtil;
 import com.simuwang.base.common.util.FileUtil;
 import com.simuwang.base.common.util.FileUtil;
@@ -21,11 +22,15 @@ import com.simuwang.base.pojo.dos.*;
 import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
 import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
 import com.simuwang.base.pojo.dto.EmailFundNavDTO;
 import com.simuwang.base.pojo.dto.EmailFundNavDTO;
 import com.simuwang.base.pojo.dto.MailboxInfoDTO;
 import com.simuwang.base.pojo.dto.MailboxInfoDTO;
-import com.simuwang.base.pojo.dto.report.PythonResult;
+import com.simuwang.base.pojo.dto.report.ParseResult;
 import com.simuwang.base.pojo.dto.report.ReportData;
 import com.simuwang.base.pojo.dto.report.ReportData;
+import com.simuwang.base.pojo.dto.report.ReportParseStatus;
+import com.simuwang.base.pojo.dto.report.ReportParserParams;
 import com.simuwang.base.pojo.valuation.CmValuationTableAttribute;
 import com.simuwang.base.pojo.valuation.CmValuationTableAttribute;
-import com.simuwang.daq.components.PythonReportConverter;
-import com.simuwang.daq.components.writer.ReportWriterFactory;
+import com.simuwang.daq.components.report.parser.ReportParser;
+import com.simuwang.daq.components.report.parser.ReportParserFactory;
+import com.simuwang.daq.components.report.writer.ReportWriter;
+import com.simuwang.daq.components.report.writer.ReportWriterFactory;
 import jakarta.mail.*;
 import jakarta.mail.*;
 import jakarta.mail.internet.MimeMessage;
 import jakarta.mail.internet.MimeMessage;
 import jakarta.mail.internet.MimeMultipart;
 import jakarta.mail.internet.MimeMultipart;
@@ -37,6 +42,7 @@ import org.slf4j.LoggerFactory;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
 import org.springframework.stereotype.Service;
+import org.springframework.util.StopWatch;
 
 
 import java.io.File;
 import java.io.File;
 import java.math.BigDecimal;
 import java.math.BigDecimal;
@@ -53,10 +59,8 @@ import java.util.stream.Collectors;
 @Service
 @Service
 public class EmailParseService {
 public class EmailParseService {
 
 
+    public static final int stepSize = 10000;
     private static final Logger log = LoggerFactory.getLogger(EmailParseService.class);
     private static final Logger log = LoggerFactory.getLogger(EmailParseService.class);
-
-    private final String pyBaseUrl;
-
     private final EmailTypeRuleMapper emailTypeRuleMapper;
     private final EmailTypeRuleMapper emailTypeRuleMapper;
     private final EmailRuleConfig emailRuleConfig;
     private final EmailRuleConfig emailRuleConfig;
     private final EmailFieldMappingMapper emailFieldMapper;
     private final EmailFieldMappingMapper emailFieldMapper;
@@ -75,19 +79,22 @@ public class EmailParseService {
 
 
     @Value("${email.file.path}")
     @Value("${email.file.path}")
     private String path;
     private String path;
+
     @Autowired
     @Autowired
-    private FundInfoMapper fundInfoMapper;
+    private DaqProperties properties;
+
+    /* 报告解析和入库的方法 */
+    @Autowired
+    private ReportParserFactory reportParserFactory;
     @Autowired
     @Autowired
     private ReportWriterFactory reportWriterFactory;
     private ReportWriterFactory reportWriterFactory;
 
 
-    public static final int stepSize = 10000;
-
     public EmailParseService(EmailTypeRuleMapper emailTypeRuleMapper, EmailRuleConfig emailRuleConfig,
     public EmailParseService(EmailTypeRuleMapper emailTypeRuleMapper, EmailRuleConfig emailRuleConfig,
                              EmailFieldMappingMapper emailFieldMapper, EmailParserFactory emailParserFactory,
                              EmailFieldMappingMapper emailFieldMapper, EmailParserFactory emailParserFactory,
                              EmailParseInfoMapper emailParseInfoMapper, EmailFileInfoMapper emailFileInfoMapper,
                              EmailParseInfoMapper emailParseInfoMapper, EmailFileInfoMapper emailFileInfoMapper,
                              EmailFundNavMapper emailFundNavMapper, EmailFundAssetMapper emailFundAssetMapper,
                              EmailFundNavMapper emailFundNavMapper, EmailFundAssetMapper emailFundAssetMapper,
                              AssetMapper assetMapper, NavMapper navMapper, FundService fundService,
                              AssetMapper assetMapper, NavMapper navMapper, FundService fundService,
-                             FundAliasMapper fundAliasMapper, DaqProperties properties,
+                             FundAliasMapper fundAliasMapper,
                              ValuationTableMapper valuationTableMapper, ValuationTableAttributeMapper valuationTableAttributeMapper,
                              ValuationTableMapper valuationTableMapper, ValuationTableAttributeMapper valuationTableAttributeMapper,
                              FundPositionDetailMapper fundPositionDetailMapper) {
                              FundPositionDetailMapper fundPositionDetailMapper) {
         this.emailTypeRuleMapper = emailTypeRuleMapper;
         this.emailTypeRuleMapper = emailTypeRuleMapper;
@@ -103,7 +110,6 @@ public class EmailParseService {
         this.fundService = fundService;
         this.fundService = fundService;
         this.fundAliasMapper = fundAliasMapper;
         this.fundAliasMapper = fundAliasMapper;
 
 
-        this.pyBaseUrl = properties.getPyBaseUrl();
         this.valuationTableMapper = valuationTableMapper;
         this.valuationTableMapper = valuationTableMapper;
         this.valuationTableAttributeMapper = valuationTableAttributeMapper;
         this.valuationTableAttributeMapper = valuationTableAttributeMapper;
         this.fundPositionDetailMapper = fundPositionDetailMapper;
         this.fundPositionDetailMapper = fundPositionDetailMapper;
@@ -175,7 +181,7 @@ public class EmailParseService {
         emailId = saveEmailParseInfo(emailParseInfoDO);
         emailId = saveEmailParseInfo(emailParseInfoDO);
 
 
         // python 报告解析接口结果
         // python 报告解析接口结果
-        List<ReportData> dataList = ListUtil.list(false);
+        List<ParseResult<ReportData>> dataList = ListUtil.list(false);
         for (Map.Entry<EmailContentInfoDTO, List<EmailFundNavDTO>> fileNameNavEntry : fileNameNavMap.entrySet()) {
         for (Map.Entry<EmailContentInfoDTO, List<EmailFundNavDTO>> fileNameNavEntry : fileNameNavMap.entrySet()) {
             // 保存邮件文件表
             // 保存邮件文件表
             EmailContentInfoDTO emailContentInfoDTO = fileNameNavEntry.getKey();
             EmailContentInfoDTO emailContentInfoDTO = fileNameNavEntry.getKey();
@@ -193,12 +199,10 @@ public class EmailParseService {
             if (CollUtil.isEmpty(fundNavDTOList) && !Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
             if (CollUtil.isEmpty(fundNavDTOList) && !Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
                 continue;
                 continue;
             }
             }
-            // python接口解析结果
-            ReportData data = this.requestPyAndResult(fileId, emailContentInfoDTO);
-            if (data != null) {
-                // 保存报告解析数据
-                this.reportWriterFactory.getInstance(data.getReportType()).write(data);
-                dataList.add(data);
+            if (Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
+                // 解析结果(可以从python获取或者自行解析)并保存报告
+                ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(fileId, emailContentInfoDTO);
+                dataList.add(parseResult);
             }
             }
             for (EmailFundNavDTO fundNavDTO : fundNavDTOList) {
             for (EmailFundNavDTO fundNavDTO : fundNavDTOList) {
                 // 设置净值数据的解析状态
                 // 设置净值数据的解析状态
@@ -212,11 +216,6 @@ public class EmailParseService {
         // 更新邮件解析结果 -> 当【净值日期】和【备案编码/基金名称】能正常解读,即识别为【成功】
         // 更新邮件解析结果 -> 当【净值日期】和【备案编码/基金名称】能正常解读,即识别为【成功】
         long successNavCount = fileNameNavMap.values().stream().flatMap(List::stream).filter(e -> e != null && StrUtil.isBlank(e.getFailReason())).count();
         long successNavCount = fileNameNavMap.values().stream().flatMap(List::stream).filter(e -> e != null && StrUtil.isBlank(e.getFailReason())).count();
         emailParseStatus = successNavCount >= 1 ? EmailParseStatusConst.SUCCESS : EmailParseStatusConst.FAIL;
         emailParseStatus = successNavCount >= 1 ? EmailParseStatusConst.SUCCESS : EmailParseStatusConst.FAIL;
-        // 报告邮件有一条成功就表示整体成功
-        if (Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType) && CollUtil.isNotEmpty(dataList)) {
-            long count = dataList.size();
-            emailParseStatus = count >= 1 ? EmailParseStatusConst.SUCCESS : EmailParseStatusConst.FAIL;
-        }
         String failReason = null;
         String failReason = null;
         if (emailParseStatus == EmailParseStatusConst.FAIL) {
         if (emailParseStatus == EmailParseStatusConst.FAIL) {
             // 邮件解析失败时 -> 保存失败原因
             // 邮件解析失败时 -> 保存失败原因
@@ -224,6 +223,14 @@ public class EmailParseService {
             List<EmailFundNavDTO> navDTOList = fileNameNavMap.values().stream().flatMap(List::stream).toList();
             List<EmailFundNavDTO> navDTOList = fileNameNavMap.values().stream().flatMap(List::stream).toList();
             failReason = hasPdfFile == 1 && CollUtil.isEmpty(navDTOList) ? "无法从pdf文件中获取到数据" : navDTOList.stream().map(EmailFundNavDTO::getFailReason).distinct().collect(Collectors.joining("/"));
             failReason = hasPdfFile == 1 && CollUtil.isEmpty(navDTOList) ? "无法从pdf文件中获取到数据" : navDTOList.stream().map(EmailFundNavDTO::getFailReason).distinct().collect(Collectors.joining("/"));
         }
         }
+        // 报告邮件有一条失败就表示整个邮件解析失败
+        if (Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType) && CollUtil.isNotEmpty(dataList)) {
+            failReason = dataList.stream().filter(e -> !Objects.equals(1, e.getStatus()))
+                    .findFirst().map(ParseResult::getMsg).orElse(null);
+            if (failReason != null) {
+                emailParseStatus = EmailParseStatusConst.FAIL;
+            }
+        }
         emailParseInfoMapper.updateParseStatus(emailId, emailParseStatus, failReason);
         emailParseInfoMapper.updateParseStatus(emailId, emailParseStatus, failReason);
     }
     }
 
 
@@ -361,61 +368,80 @@ public class EmailParseService {
         }).collect(Collectors.toList());
         }).collect(Collectors.toList());
     }
     }
 
 
-    private ReportData requestPyAndResult(int fileId, EmailContentInfoDTO emailContentInfoDTO) {
+    private ParseResult<ReportData> parseReportAndHandleResult(int fileId, EmailContentInfoDTO emailContentInfoDTO) {
+        ParseResult<ReportData> result = new ParseResult<>();
         String fileName = emailContentInfoDTO.getFileName();
         String fileName = emailContentInfoDTO.getFileName();
         Integer emailType = emailContentInfoDTO.getEmailType();
         Integer emailType = emailContentInfoDTO.getEmailType();
+        if (!Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType) || StrUtil.isBlank(fileName)) {
+            result.setStatus(ReportParseStatus.NOT_A_REPORT.getCode());
+            result.setMsg(ReportParseStatus.NOT_A_REPORT.getMsg());
+            return result;
+        }
+        Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
+        Matcher matcher = pattern.matcher(fileName);
+        String registerNumber = null;
+        if (matcher.find()) {
+            registerNumber = matcher.group();
+        }
+        // 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
+        ReportType reportType = ReportType.MONTHLY;
+        if (StrUtil.containsAny(fileName, ReportType.QUARTERLY.getPatterns())) {
+            reportType = ReportType.QUARTERLY;
+        } else if (StrUtil.containsAny(fileName, ReportType.ANNUALLY.getPatterns())) {
+            reportType = ReportType.ANNUALLY;
+        }
+        // 解析器--如果开启python解析则直接调用python接口,否则根据文件后缀获取对应解析器
+        ReportParserFileType fileType;
+        if (Objects.equals(Boolean.TRUE, this.properties.getEnablePyParser())) {
+            fileType = ReportParserFileType.PYTHON;
+        } else {
+            String fileSuffix = StrUtil.subAfter(fileName, ".", true);
+            fileType = ReportParserFileType.getBySuffix(fileSuffix);
+        }
+        // 解析报告
+        ReportParserParams params = null;
         ReportData reportData = null;
         ReportData reportData = null;
-        if (Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
-            if (StrUtil.isBlank(fileName)) {
-                return null;
-            }
-            Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
-            Matcher matcher = pattern.matcher(fileName);
-            String registerNumber = null;
-            if (matcher.find()) {
-                registerNumber = matcher.group();
-            }
-            int type = 0;
-            if (fileName.contains("季报") || fileName.contains("季度")) {
-                type = 1;
-            } else if (fileName.contains("年报") || fileName.contains("年度")) {
-                type = 2;
-            }
-            String api = "/api/v1/parse/amac_report";
-            Map<String, Object> params = MapUtil.newHashMap(16);
-            params.put("file_id", fileId);
-            params.put("file_path", emailContentInfoDTO.getFilePath());
-            params.put("register_number", registerNumber);
-            params.put("file_type", type);
-            params.put("file_name", fileName);
-            if (StrUtil.isNotBlank(registerNumber)) {
-                FundAndCompanyInfoDO info = this.fundInfoMapper.queryFundAndTrustByRegisterNumber(registerNumber);
-                if (info != null) {
-                    params.put("fund_name", info.getFundName());
-                    params.put("trust_name", info.getCompanyName());
-                }
+        StopWatch parserWatch = new StopWatch();
+        parserWatch.start();
+        try {
+            params = ReportParserParams.builder().fileId(fileId).filename(fileName)
+                    .filepath(emailContentInfoDTO.getFilePath()).registerNumber(registerNumber).build();
+            ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
+            reportData = instance.parse(params);
+            result.setStatus(1);
+            result.setMsg("报告解析成功");
+            result.setData(reportData);
+        } catch (ReportParseException e) {
+            log.error("报告{}解析失败\n{}", params, e.getMsg());
+            result.setStatus(e.getCode());
+            result.setMsg(e.getMsg());
+        } catch (Exception e) {
+            log.error("报告{}解析失败\n{}", params, ExceptionUtil.stacktraceToString(e));
+            result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
+            result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
+        } finally {
+            parserWatch.stop();
+            if (log.isInfoEnabled()) {
+                log.info("报告{}解析结果为{},耗时{}ms", params, reportData, parserWatch.getTotalTimeMillis());
             }
             }
-            long millis = System.currentTimeMillis();
+        }
+        // 保存报告解析结果
+        if (reportData != null) {
+            StopWatch writeWatch = new StopWatch();
+            writeWatch.start();
             try {
             try {
-                String body = HttpUtil.post(this.pyBaseUrl + api, JSONUtil.toJsonStr(params));
-                PythonResult<?> result = PythonReportConverter.convert(JSONUtil.parseObj(body), type);
-                if (!Objects.equals(1, result.getStatus())) {
-                    log.warn("报告{} 解析失败:{}", params, result.getMsg());
-                    return null;
-                }
-                reportData = result.getData();
-                if (log.isInfoEnabled()) {
-                    log.info("报告{}结果为:\n{}", params, reportData);
-                }
+                ReportWriter<ReportData> instance = this.reportWriterFactory.getInstance(reportType);
+                instance.write(reportData);
             } catch (Exception e) {
             } catch (Exception e) {
-                log.error("请求python的报告解析接口报错\n{}", ExceptionUtil.stacktraceToString(e));
+                log.error("报告{}结果保存失败\n{}", params, ExceptionUtil.stacktraceToString(e));
             } finally {
             } finally {
+                writeWatch.stop();
                 if (log.isInfoEnabled()) {
                 if (log.isInfoEnabled()) {
-                    log.info("当前报告{}解析完成,总计耗时{}ms", params, (System.currentTimeMillis() - millis));
+                    log.info("报告{}解析结果保存完成,耗时{}ms", params, writeWatch.getTotalTimeMillis());
                 }
                 }
             }
             }
         }
         }
-        return reportData;
+        return result;
     }
     }
 
 
     private void saveNavAndAssetNet(Integer fileId, List<EmailFundNavDTO> fundNavDTOList, Date parseDate) {
     private void saveNavAndAssetNet(Integer fileId, List<EmailFundNavDTO> fundNavDTOList, Date parseDate) {
@@ -829,8 +855,8 @@ public class EmailParseService {
                     emailContentInfoDTOList.add(emailContentInfoDTO);
                     emailContentInfoDTOList.add(emailContentInfoDTO);
                 }
                 }
                 if (CollUtil.isNotEmpty(emailContentInfoDTOList)) {
                 if (CollUtil.isNotEmpty(emailContentInfoDTOList)) {
-                    // 估值表邮件不展示正文html文件
-                    if (emailType.equals(EmailTypeConst.VALUATION_EMAIL_TYPE)) {
+                    // 估值表或定期报告邮件不展示正文html文件
+                    if (emailType.equals(EmailTypeConst.VALUATION_EMAIL_TYPE) || emailType.equals(EmailTypeConst.REPORT_EMAIL_TYPE)) {
                         emailContentInfoDTOList = emailContentInfoDTOList.stream().filter(e -> !ExcelUtil.isHTML(e.getFilePath())).toList();
                         emailContentInfoDTOList = emailContentInfoDTOList.stream().filter(e -> !ExcelUtil.isHTML(e.getFilePath())).toList();
                     }
                     }
                     emailContentInfoDTOList.forEach(e -> {
                     emailContentInfoDTOList.forEach(e -> {

+ 4 - 2
service-daq/src/main/java/com/simuwang/daq/service/ReportEmailParser.java

@@ -4,6 +4,7 @@ import cn.hutool.core.collection.ListUtil;
 import com.simuwang.base.common.conts.EmailTypeConst;
 import com.simuwang.base.common.conts.EmailTypeConst;
 import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
 import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
 import com.simuwang.base.pojo.dto.EmailFundNavDTO;
 import com.simuwang.base.pojo.dto.EmailFundNavDTO;
+import com.simuwang.daq.components.report.parser.pdf.AbstractPDReportParser;
 import org.springframework.stereotype.Component;
 import org.springframework.stereotype.Component;
 
 
 import java.util.List;
 import java.util.List;
@@ -12,7 +13,8 @@ import java.util.Map;
 /**
 /**
  * @author wangzaijun
  * @author wangzaijun
  * @date 2024/9/25 14:52
  * @date 2024/9/25 14:52
- * @description 报告的解析逻辑,目前先调用python接口
+ * @description 报告的解析逻辑
+ * @see com.simuwang.daq.components.report.parser.ReportParser,com.simuwang.daq.components.report.parser.py.AbstractPyReportParser, AbstractPDReportParser
  */
  */
 @Component
 @Component
 public class ReportEmailParser extends AbstractEmailParser {
 public class ReportEmailParser extends AbstractEmailParser {
@@ -24,7 +26,7 @@ public class ReportEmailParser extends AbstractEmailParser {
 
 
     @Override
     @Override
     public List<EmailFundNavDTO> parse(EmailContentInfoDTO emailContentInfoDTO, Map<String, List<String>> emailFieldMap) {
     public List<EmailFundNavDTO> parse(EmailContentInfoDTO emailContentInfoDTO, Map<String, List<String>> emailFieldMap) {
-        // 目前啥也不做,调用python的逻辑在EmailParseService里写死,等java的逻辑完成后注释掉python逻辑
+        // 目前啥也不做,但是要返回空集合并且支持报告解析
         return ListUtil.empty();
         return ListUtil.empty();
     }
     }
 }
 }

+ 0 - 20
service-daq/src/main/java/com/simuwang/daq/service/ReportParseService.java

@@ -1,20 +0,0 @@
-package com.simuwang.daq.service;
-
-import com.simuwang.daq.components.ReportParser;
-import org.springframework.stereotype.Service;
-
-@Service
-public class ReportParseService {
-    private final ReportParser parser;
-
-    public ReportParseService(ReportParser parser) {
-        this.parser = parser;
-    }
-
-    public void parse() {
-//        this.parser.parse(1, "D:\\Documents\\workspace\\idea\\smppw\\data-daq\\service-daq\\src\\main\\java\\com\\simuwang\\daq\\utils\\12931.pdf", "幻方量化1000指数专享1号5期私募证券投资基金宁波幻方量化投资管理合伙企业(有限合伙)");
-        this.parser.parse(1,
-                "D:\\Documents\\workspace\\idea\\smppw\\data-daq\\service-daq\\src\\main\\java\\com\\simuwang\\daq\\utils\\12931.pdf",
-                "古曲泉发一号私募证券投资基金上海古曲私募基金管理有限公司");
-    }
-}

+ 269 - 255
service-daq/src/main/java/com/simuwang/daq/utils/ReportParseUtil.java

@@ -1,50 +1,63 @@
-package com.simuwang.daq.utils;
-
-import cn.hutool.core.map.MapUtil;
-import cn.hutool.core.util.StrUtil;
-import cn.hutool.http.HttpUtil;
-import cn.hutool.json.JSONObject;
-import cn.hutool.json.JSONUtil;
-import com.simuwang.base.pojo.dto.report.PythonResult;
-import com.simuwang.daq.components.PythonReportConverter;
-import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDResources;
-import org.apache.pdfbox.pdmodel.common.PDStream;
-import org.apache.pdfbox.pdmodel.graphics.PDXObject;
-import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
-
-import java.io.IOException;
-import java.util.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-public class ReportParseUtil {
-    public static void main(String[] args) throws IOException {
-        String fileName = "SJM970_排排精选进取一号私募证券投资基金_2022年第4季度报告.pdf";
-        Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
-        Matcher matcher = pattern.matcher(fileName);
-        String registerNumber = null;
-        if (matcher.find()) {
-            registerNumber = matcher.group();
-        }
-
-        int type = 1;
-        String baseUrl = "http://192.168.0.81:8088";
-        String api = "/api/v1/parse/amac_report";
-        Map<String, Object> params = MapUtil.newHashMap(16);
-        params.put("file_id", 111112);
-        params.put("file_path", "E:/workproject/fastparse/src/fastparse/static/reports/quarterly_report/13445.pdf");
-        params.put("register_number", registerNumber);
-        params.put("file_type", type);
-        params.put("file_name", fileName);
-        params.put("fund_name", null);
-        params.put("trust_name", null);
-        String body = HttpUtil.post(baseUrl + api, JSONUtil.toJsonStr(params));
-        JSONObject obj = JSONUtil.parseObj(body);
-        PythonResult<?> result = PythonReportConverter.convert(obj, type);
-        System.out.println(result);
-
+//package com.simuwang.daq.utils;
+//
+//import cn.hutool.core.collection.ListUtil;
+//import cn.hutool.core.map.MapUtil;
+//import cn.hutool.core.util.ReflectUtil;
+//import cn.hutool.core.util.StrUtil;
+//import cn.hutool.http.HttpUtil;
+//import cn.hutool.json.JSONObject;
+//import cn.hutool.json.JSONUtil;
+//import com.simuwang.base.common.conts.Constants;
+//import com.simuwang.base.pojo.dto.report.PythonResult;
+//import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
+//import com.simuwang.daq.components.CustomPDFTextStripper;
+//import com.simuwang.daq.components.PythonReportConverter;
+//import com.smppw.common.pojo.ValueLabelVO;
+//import org.apache.pdfbox.Loader;
+//import org.apache.pdfbox.cos.COSName;
+//import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
+//import org.apache.pdfbox.pdmodel.PDDocument;
+//import org.apache.pdfbox.pdmodel.PDPage;
+//import org.apache.pdfbox.pdmodel.PDResources;
+//import org.apache.pdfbox.pdmodel.common.PDStream;
+//import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+//import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+//import org.apache.pdfbox.text.PDFTextStripper;
+//import technology.tabula.*;
+//import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
+//
+//import java.io.IOException;
+//import java.util.*;
+//import java.util.regex.Matcher;
+//import java.util.regex.Pattern;
+//import java.util.stream.Collectors;
+//
+//public class ReportParseUtil {
+//    public static void main(String[] args) throws IOException {
+////        String fileName = "SJM970_排排精选进取一号私募证券投资基金_2022年第4季度报告.pdf";
+////        Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
+////        Matcher matcher = pattern.matcher(fileName);
+////        String registerNumber = null;
+////        if (matcher.find()) {
+////            registerNumber = matcher.group();
+////        }
+////
+////        int type = 1;
+////        String baseUrl = "http://192.168.0.81:8088";
+////        String api = "/api/v1/parse/amac_report";
+////        Map<String, Object> params = MapUtil.newHashMap(16);
+////        params.put("file_id", 111112);
+////        params.put("file_path", "E:/workproject/fastparse/src/fastparse/static/reports/quarterly_report/13445.pdf");
+////        params.put("register_number", registerNumber);
+////        params.put("file_type", type);
+////        params.put("file_name", fileName);
+////        params.put("fund_name", null);
+////        params.put("trust_name", null);
+////        String body = HttpUtil.post(baseUrl + api, JSONUtil.toJsonStr(params));
+////        JSONObject obj = JSONUtil.parseObj(body);
+////        PythonResult<?> result = PythonReportConverter.convert(obj, type);
+////        System.out.println(result);
+//
 //        List<ValueLabelVO> fieldMapper = ListUtil.list(false);
 //        List<ValueLabelVO> fieldMapper = ListUtil.list(false);
 //        fieldMapper.add(new ValueLabelVO("fundName", "基金名称"));
 //        fieldMapper.add(new ValueLabelVO("fundName", "基金名称"));
 //        fieldMapper.add(new ValueLabelVO("registerNumber", "基金编码"));
 //        fieldMapper.add(new ValueLabelVO("registerNumber", "基金编码"));
@@ -56,12 +69,12 @@ public class ReportParseUtil {
 //        fieldMapper.add(new ValueLabelVO("advisorName", "投资顾问"));
 //        fieldMapper.add(new ValueLabelVO("advisorName", "投资顾问"));
 //        fieldMapper.add(new ValueLabelVO("reviewed", "复核"));
 //        fieldMapper.add(new ValueLabelVO("reviewed", "复核"));
 //
 //
-//        Map<String, List<String>> watermarkMap = generateWatermarkListMap("幻方量化1000指数专享1号5期私募证券投资基金", "宁波幻方量化投资管理合伙企业(有限合伙)", null);
-//        List<String> watermarks = watermarkMap.get("less");
+////        Map<String, List<String>> watermarkMap = generateWatermarkListMap("幻方量化1000指数专享1号5期私募证券投资基金", "宁波幻方量化投资管理合伙企业(有限合伙)", null);
+////        List<String> watermarks = watermarkMap.get("less");
 //
 //
 ////        System.out.println(watermarks);
 ////        System.out.println(watermarks);
 ////        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile("D:\\Documents\\workspace\\idea\\smppw\\data-daq\\service-daq\\src\\main\\java\\com\\simuwang\\daq\\utils\\12931.pdf"))) {
 ////        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile("D:\\Documents\\workspace\\idea\\smppw\\data-daq\\service-daq\\src\\main\\java\\com\\simuwang\\daq\\utils\\12931.pdf"))) {
-//        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile("D:\\Documents\\workspace\\idea\\smppw\\data-daq\\service-daq\\src\\main\\java\\com\\simuwang\\daq\\utils\\2061834.pdf"))) {
+//        try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile("C:\\Users\\Administrator\\Desktop\\self\\新报告解析\\基协报告\\季报\\SVP311_私募基金季报PDF_国恩回报6号增强私募证券投资基金_2024年06月30日.pdf"))) {
 ////            PDFTextStripper stripper = new PDFTextStripper();
 ////            PDFTextStripper stripper = new PDFTextStripper();
 ////            stripper.setSortByPosition(true);
 ////            stripper.setSortByPosition(true);
 ////            String allText = stripper.getText(document);
 ////            String allText = stripper.getText(document);
@@ -71,8 +84,9 @@ public class ReportParseUtil {
 //            PDFTextStripper textStripper = new CustomPDFTextStripper();
 //            PDFTextStripper textStripper = new CustomPDFTextStripper();
 //            textStripper.setSortByPosition(true);
 //            textStripper.setSortByPosition(true);
 //            String text1 = textStripper.getText(document);
 //            String text1 = textStripper.getText(document);
-//            text1 = text1.replace("+\r\n", "").replace("+","");
-//            List<String> textList = StrUtil.split(text1, "\r\n");
+//            text1 = text1.replace(Constants.WATERMARK_REPLACE, Constants.EMPTY);
+//            List<String> textList = StrUtil.split(text1, System.lineSeparator());
+//            textList.removeIf(StrUtil::isBlank);
 //            System.out.println(textList.get(0));
 //            System.out.println(textList.get(0));
 //
 //
 ////            for (PDPage page : document.getPages()) {
 ////            for (PDPage page : document.getPages()) {
@@ -115,7 +129,7 @@ public class ReportParseUtil {
 //                                baseInfoMap.put(cols.get(j * 2).getText(), cols.get(j * 2 + 1).getText());
 //                                baseInfoMap.put(cols.get(j * 2).getText(), cols.get(j * 2 + 1).getText());
 //                            }
 //                            }
 //                        }
 //                        }
-//                        ReportFundInfo reportFundInfo = new ReportFundInfo();
+//                        ReportFundInfoDTO reportFundInfo = new ReportFundInfoDTO();
 //                        baseInfoMap.forEach((k, v) -> {
 //                        baseInfoMap.forEach((k, v) -> {
 //                            for (ValueLabelVO vo : fieldMapper) {
 //                            for (ValueLabelVO vo : fieldMapper) {
 //                                String fieldName = vo.getValue();
 //                                String fieldName = vo.getValue();
@@ -137,220 +151,220 @@ public class ReportParseUtil {
 //                }
 //                }
 //            }
 //            }
 //        }
 //        }
-    }
-
-    /**
-     * 找图片水印
-     *
-     * @param page
-     * @return
-     * @throws IOException
-     */
-    public static Map<COSName, PDImageXObject> findImageWatermark(PDPage page) throws IOException {
-        Map<COSName, PDImageXObject> watermarkMap = MapUtil.newHashMap();
-        PDResources resources = page.getResources();
-        Iterable<COSName> xObjectNames = resources.getXObjectNames();
-        for (COSName xObjectName : xObjectNames) {
-            PDXObject xObject = resources.getXObject(xObjectName);
-            PDStream stream = xObject.getStream();
-            PDImageXObject imageXObject = null;
-            try {
-                imageXObject = new PDImageXObject(stream, resources);
-            } catch (Exception e) {
-                e.printStackTrace();
-            }
-            if (imageXObject != null) {
-                watermarkMap.put(xObjectName, imageXObject);
-            }
-        }
-        return watermarkMap;
-    }
-
-    private static Map<String, List<String>> generateWatermarkListMap(String fundName, String trustName, String registerNumber) {
-        Map<String, List<String>> result = MapUtil.newHashMap(32);
-        // 生成水印列表
-
-        fundName = StrUtil.isNotBlank(fundName) ? fundName : "私募证券投资基金";
-        trustName = StrUtil.isNotBlank(trustName) ? trustName : "资产管理有限公司";
-        registerNumber = StrUtil.isNotBlank(registerNumber) ? registerNumber : "";
-        String text = fundName + trustName + registerNumber;
-        text = text.replaceAll("[()]", ""); // 移除括号
-        List<String> textList = new ArrayList<>(new HashSet<>(convertStringToList(text)));
-        Collections.reverse(textList);
-        StringBuilder sb = new StringBuilder(textList.size());
-        for (String ch : textList) {
-            sb.append(ch);
-        }
-        String joinedText = sb.toString();
-
-        // 基本水印列表
-        List<String> wkList = new ArrayList<>();
-        for (String ch : textList) {
-            wkList.add(ch + "\r\n");
-            wkList.add("\r\n" + ch);
-        }
-
-        // 查找数字
-        List<String> matches = findDigits(fundName);
-        if (!matches.isEmpty()) {
-            for (String match : matches) {
-                wkList.add("\r\n" + match);
-                wkList.add(match + "\r\n");
-            }
-        }
-        wkList.add("-");
-        wkList.add("【");
-        wkList.add("】");
-        wkList.add("\r");
-        wkList.add("\r\n");
-
-        String noNumberText = removeDigits(joinedText);
-
-        // 生成不同字段的水印列表
-        result.put("report_name", new ArrayList<>(wkList));
-        result.get("report_name").addAll(convertStringToList("有限公司"));
-
-        result.put("less", new ArrayList<>(wkList));
-
-        result.put("more", new ArrayList<>(wkList));
-        result.get("more").addAll(convertStringToList(noNumberText));
-
-        result.put("leverage", new ArrayList<>(wkList));
-        result.get("leverage").addAll(convertStringToList(removeKeywords(noNumberText, "基金资产")));
-
-        result.put("base_info", new ArrayList<>(wkList));
-        result.get("base_info").addAll(convertStringToList(removeKeywords(text, "基", "金", "投资", "管理", "有", "份", "融", "资", "产", "本", "号", "收益", "策略", "期")));
-
-        result.put("industry", new ArrayList<>(wkList));
-        result.get("industry").addAll(convertStringToList(removeKeywords(noNumberText, "基金融公产")));
-
-        result.put("market_value", new ArrayList<>(Collections.singletonList("\n")));
-        return result;
-    }
-
-    private static List<String> findDigits(String text) {
-        List<String> digits = new ArrayList<>();
-        Pattern pattern = Pattern.compile("\\d");
-        Matcher matcher = pattern.matcher(text);
-        while (matcher.find()) {
-            digits.add(matcher.group());
-        }
-        return digits;
-    }
-
-    private static String removeDigits(String text) {
-        return text.replaceAll("\\d", "");
-    }
-
-    private static String removeKeywords(String text, String... keywords) {
-        for (String keyword : keywords) {
-            text = text.replaceAll(keyword, "");
-        }
-        return text;
-    }
-
-    private static List<String> convertStringToList(String text) {
-        List<String> charList = new ArrayList<>();
-        for (char c : text.toCharArray()) {
-            charList.add(c + "");
-        }
-        return charList;
-    }
-
-    public static String processString(List<String> wmList, String string) {
-        // 生成正则表达式模式
-        String pat = String.join("|", wmList);
-        // 使用正则表达式移除wmList中的元素
-        string = removeMatches(string, pat);
-        // 替换中文括号为英文括号
-        string = string.replace("(", "(").replace(")", ")");
-        // 移除空格
-        string = string.replace(" ", "");
-        // 如果字符串以括号开头,则移除第一个字符
-        if (startsWithParenthesis(string)) {
-            string = string.substring(1);
-        }
-
-        return string;
-    }
-
-    private static String removeMatches(String input, String pattern) {
-        // 编译正则表达式
-        Pattern compiledPattern = Pattern.compile(pattern);
-        // 创建Matcher对象
-        Matcher matcher = compiledPattern.matcher(input);
-        // 使用replaceAll方法替换所有匹配到的字符为空字符串
-        return matcher.replaceAll("");
-    }
-
-    private static boolean startsWithParenthesis(String input) {
-        // 匹配以括号开头的字符串
-        Pattern pattern = Pattern.compile("^[()].*");
-        Matcher matcher = pattern.matcher(input);
-        return matcher.find();
-    }
-
-//    public static void removeTextWatermark(PDPage page) throws IOException {
+//    }
+//
+//    /**
+//     * 找图片水印
+//     *
+//     * @param page
+//     * @return
+//     * @throws IOException
+//     */
+//    public static Map<COSName, PDImageXObject> findImageWatermark(PDPage page) throws IOException {
+//        Map<COSName, PDImageXObject> watermarkMap = MapUtil.newHashMap();
 //        PDResources resources = page.getResources();
 //        PDResources resources = page.getResources();
-////        if (StrUtil.isAllBlank(fundName, trustName)) {
-////            return;
-////        }
-//        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
-//        stripper.setSortByPosition(true);
-//        stripper.addRegion("watermark", new Rectangle2D.Float(0, 0, page.getMediaBox().getWidth(), page.getMediaBox().getHeight()));
-//        stripper.extractRegions(page);
+//        Iterable<COSName> xObjectNames = resources.getXObjectNames();
+//        for (COSName xObjectName : xObjectNames) {
+//            PDXObject xObject = resources.getXObject(xObjectName);
+//            PDStream stream = xObject.getStream();
+//            PDImageXObject imageXObject = null;
+//            try {
+//                imageXObject = new PDImageXObject(stream, resources);
+//            } catch (Exception e) {
+//                e.printStackTrace();
+//            }
+//            if (imageXObject != null) {
+//                watermarkMap.put(xObjectName, imageXObject);
+//            }
+//        }
+//        return watermarkMap;
+//    }
 //
 //
-//        PDFStreamEngine engine = new PDFTextStripper();
-//        engine.addOperator(new SetMatrix(stripper));
+//    private static Map<String, List<String>> generateWatermarkListMap(String fundName, String trustName, String registerNumber) {
+//        Map<String, List<String>> result = MapUtil.newHashMap(32);
+//        // 生成水印列表
 //
 //
-//    }
+//        fundName = StrUtil.isNotBlank(fundName) ? fundName : "私募证券投资基金";
+//        trustName = StrUtil.isNotBlank(trustName) ? trustName : "资产管理有限公司";
+//        registerNumber = StrUtil.isNotBlank(registerNumber) ? registerNumber : "";
+//        String text = fundName + trustName + registerNumber;
+//        text = text.replaceAll("[()]", ""); // 移除括号
+//        List<String> textList = new ArrayList<>(new HashSet<>(convertStringToList(text)));
+//        Collections.reverse(textList);
+//        StringBuilder sb = new StringBuilder(textList.size());
+//        for (String ch : textList) {
+//            sb.append(ch);
+//        }
+//        String joinedText = sb.toString();
 //
 //
-//    private static void processResources(PDResources resources) throws IOException {
-//        for (COSName name : resources.getXObjectNames()) {
-//            PDXObject xobject = resources.getXObject(name);
-//            if (xobject instanceof PDFormXObject) {
-//                PDFormXObject formXObject = (PDFormXObject) xobject;
-//                writeTokensToStream(formXObject.getContentStream(),
-//                        createTokensWithoutText(formXObject));
-//                processResources(formXObject.getResources());
-//            }
+//        // 基本水印列表
+//        List<String> wkList = new ArrayList<>();
+//        for (String ch : textList) {
+//            wkList.add(ch + "\r\n");
+//            wkList.add("\r\n" + ch);
 //        }
 //        }
-//        for (COSName name : resources.getPatternNames()) {
-//            PDAbstractPattern pattern = resources.getPattern(name);
-//            if (pattern instanceof PDTilingPattern) {
-//                PDTilingPattern tilingPattern = (PDTilingPattern) pattern;
-//                writeTokensToStream(tilingPattern.getContentStream(),
-//                        createTokensWithoutText(tilingPattern));
-//                processResources(tilingPattern.getResources());
+//
+//        // 查找数字
+//        List<String> matches = findDigits(fundName);
+//        if (!matches.isEmpty()) {
+//            for (String match : matches) {
+//                wkList.add("\r\n" + match);
+//                wkList.add(match + "\r\n");
 //            }
 //            }
 //        }
 //        }
+//        wkList.add("-");
+//        wkList.add("【");
+//        wkList.add("】");
+//        wkList.add("\r");
+//        wkList.add("\r\n");
+//
+//        String noNumberText = removeDigits(joinedText);
+//
+//        // 生成不同字段的水印列表
+//        result.put("report_name", new ArrayList<>(wkList));
+//        result.get("report_name").addAll(convertStringToList("有限公司"));
+//
+//        result.put("less", new ArrayList<>(wkList));
+//
+//        result.put("more", new ArrayList<>(wkList));
+//        result.get("more").addAll(convertStringToList(noNumberText));
+//
+//        result.put("leverage", new ArrayList<>(wkList));
+//        result.get("leverage").addAll(convertStringToList(removeKeywords(noNumberText, "基金资产")));
+//
+//        result.put("base_info", new ArrayList<>(wkList));
+//        result.get("base_info").addAll(convertStringToList(removeKeywords(text, "基", "金", "投资", "管理", "有", "份", "融", "资", "产", "本", "号", "收益", "策略", "期")));
+//
+//        result.put("industry", new ArrayList<>(wkList));
+//        result.get("industry").addAll(convertStringToList(removeKeywords(noNumberText, "基金融公产")));
+//
+//        result.put("market_value", new ArrayList<>(Collections.singletonList("\n")));
+//        return result;
 //    }
 //    }
 //
 //
-//    private static void writeTokensToStream(PDStream newContents, List<Object> newTokens) throws IOException {
-//        try (OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE)) {
-//            ContentStreamWriter writer = new ContentStreamWriter(out);
-//            writer.writeTokens(newTokens);
+//    private static List<String> findDigits(String text) {
+//        List<String> digits = new ArrayList<>();
+//        Pattern pattern = Pattern.compile("\\d");
+//        Matcher matcher = pattern.matcher(text);
+//        while (matcher.find()) {
+//            digits.add(matcher.group());
 //        }
 //        }
+//        return digits;
 //    }
 //    }
 //
 //
-//    private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException {
-//        PDFStreamParser parser = new PDFStreamParser(contentStream);
-//        Object token = parser.parseNextToken();
-//        List<Object> newTokens = new ArrayList<>();
-//        while (token != null) {
-//            if (token instanceof Operator op) {
-//                String opName = op.getName();
-//                if (OperatorName.SET_MATRIX.equals(opName)) {
-//                    // remove the argument to this operator
-//                    newTokens.remove(newTokens.size() - 1);
+//    private static String removeDigits(String text) {
+//        return text.replaceAll("\\d", "");
+//    }
 //
 //
-//                    token = parser.parseNextToken();
-//                    continue;
-//                }
-//            }
-//            newTokens.add(token);
-//            token = parser.parseNextToken();
+//    private static String removeKeywords(String text, String... keywords) {
+//        for (String keyword : keywords) {
+//            text = text.replaceAll(keyword, "");
+//        }
+//        return text;
+//    }
+//
+//    private static List<String> convertStringToList(String text) {
+//        List<String> charList = new ArrayList<>();
+//        for (char c : text.toCharArray()) {
+//            charList.add(c + "");
+//        }
+//        return charList;
+//    }
+//
+//    public static String processString(List<String> wmList, String string) {
+//        // 生成正则表达式模式
+//        String pat = String.join("|", wmList);
+//        // 使用正则表达式移除wmList中的元素
+//        string = removeMatches(string, pat);
+//        // 替换中文括号为英文括号
+//        string = string.replace("(", "(").replace(")", ")");
+//        // 移除空格
+//        string = string.replace(" ", "");
+//        // 如果字符串以括号开头,则移除第一个字符
+//        if (startsWithParenthesis(string)) {
+//            string = string.substring(1);
 //        }
 //        }
-//        return newTokens;
+//
+//        return string;
+//    }
+//
+//    private static String removeMatches(String input, String pattern) {
+//        // 编译正则表达式
+//        Pattern compiledPattern = Pattern.compile(pattern);
+//        // 创建Matcher对象
+//        Matcher matcher = compiledPattern.matcher(input);
+//        // 使用replaceAll方法替换所有匹配到的字符为空字符串
+//        return matcher.replaceAll("");
 //    }
 //    }
-}
+//
+//    private static boolean startsWithParenthesis(String input) {
+//        // 匹配以括号开头的字符串
+//        Pattern pattern = Pattern.compile("^[()].*");
+//        Matcher matcher = pattern.matcher(input);
+//        return matcher.find();
+//    }
+//
+////    public static void removeTextWatermark(PDPage page) throws IOException {
+////        PDResources resources = page.getResources();
+//////        if (StrUtil.isAllBlank(fundName, trustName)) {
+//////            return;
+//////        }
+////        PDFTextStripperByArea stripper = new PDFTextStripperByArea();
+////        stripper.setSortByPosition(true);
+////        stripper.addRegion("watermark", new Rectangle2D.Float(0, 0, page.getMediaBox().getWidth(), page.getMediaBox().getHeight()));
+////        stripper.extractRegions(page);
+////
+////        PDFStreamEngine engine = new PDFTextStripper();
+////        engine.addOperator(new SetMatrix(stripper));
+////
+////    }
+////
+////    private static void processResources(PDResources resources) throws IOException {
+////        for (COSName name : resources.getXObjectNames()) {
+////            PDXObject xobject = resources.getXObject(name);
+////            if (xobject instanceof PDFormXObject) {
+////                PDFormXObject formXObject = (PDFormXObject) xobject;
+////                writeTokensToStream(formXObject.getContentStream(),
+////                        createTokensWithoutText(formXObject));
+////                processResources(formXObject.getResources());
+////            }
+////        }
+////        for (COSName name : resources.getPatternNames()) {
+////            PDAbstractPattern pattern = resources.getPattern(name);
+////            if (pattern instanceof PDTilingPattern) {
+////                PDTilingPattern tilingPattern = (PDTilingPattern) pattern;
+////                writeTokensToStream(tilingPattern.getContentStream(),
+////                        createTokensWithoutText(tilingPattern));
+////                processResources(tilingPattern.getResources());
+////            }
+////        }
+////    }
+////
+////    private static void writeTokensToStream(PDStream newContents, List<Object> newTokens) throws IOException {
+////        try (OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE)) {
+////            ContentStreamWriter writer = new ContentStreamWriter(out);
+////            writer.writeTokens(newTokens);
+////        }
+////    }
+////
+////    private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException {
+////        PDFStreamParser parser = new PDFStreamParser(contentStream);
+////        Object token = parser.parseNextToken();
+////        List<Object> newTokens = new ArrayList<>();
+////        while (token != null) {
+////            if (token instanceof Operator op) {
+////                String opName = op.getName();
+////                if (OperatorName.SET_MATRIX.equals(opName)) {
+////                    // remove the argument to this operator
+////                    newTokens.remove(newTokens.size() - 1);
+////
+////                    token = parser.parseNextToken();
+////                    continue;
+////                }
+////            }
+////            newTokens.add(token);
+////            token = parser.parseNextToken();
+////        }
+////        return newTokens;
+////    }
+//}

+ 61 - 0
service-daq/src/main/java/technology/tabula/CustomObjectExtractor.java

@@ -0,0 +1,61 @@
+package technology.tabula;
+
+import com.simuwang.daq.components.CustomTabulaTextStripper;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+
+import java.io.IOException;
+
+/**
+ * @author wangzaijun
+ * @date 2024/9/30 18:08
+ * @description 自定义的pdf表格提取,重写的目的是为了让自定义的去水印的文本提起工具生效
+ * @see CustomTabulaTextStripper
+ */
+public class CustomObjectExtractor extends ObjectExtractor {
+    private final PDDocument pdfDocument;
+
+    public CustomObjectExtractor(PDDocument pdfDocument) {
+        super(pdfDocument);
+        this.pdfDocument = pdfDocument;
+    }
+
+    @Override
+    protected Page extractPage(Integer pageNumber) throws IOException {
+        if (pageNumber > pdfDocument.getNumberOfPages() || pageNumber < 1) {
+            throw new java.lang.IndexOutOfBoundsException("Page number does not exist.");
+        }
+        PDPage page = pdfDocument.getPage(pageNumber - 1);
+
+        ObjectExtractorStreamEngine streamEngine = new ObjectExtractorStreamEngine(page);
+        streamEngine.processPage(page);
+
+        CustomTabulaTextStripper textStripper = new CustomTabulaTextStripper(pdfDocument, pageNumber);
+        textStripper.process();
+
+        Utils.sort(textStripper.getTextElements(), Rectangle.ILL_DEFINED_ORDER);
+
+        float width, height;
+        int rotation = page.getRotation();
+        if (Math.abs(rotation) == 90 || Math.abs(rotation) == 270) {
+            width = page.getCropBox().getHeight();
+            height = page.getCropBox().getWidth();
+        } else {
+            width = page.getCropBox().getWidth();
+            height = page.getCropBox().getHeight();
+        }
+
+        return Page.Builder.newInstance()
+                .withPageDims(PageDims.of(0, 0, width, height))
+                .withRotation(rotation)
+                .withNumber(pageNumber)
+                .withPdPage(page)
+                .withPdDocument(pdfDocument)
+                .withRulings(streamEngine.rulings)
+                .withTextElements(textStripper.getTextElements())
+                .withMinCharWidth(textStripper.getMinCharWidth())
+                .withMinCharHeight(textStripper.getMinCharHeight())
+                .withIndex(textStripper.getSpatialIndex())
+                .build();
+    }
+}

+ 0 - 12
service-deploy/pom.xml

@@ -58,18 +58,6 @@
                 </exclusion>
                 </exclusion>
             </exclusions>
             </exclusions>
         </dependency>
         </dependency>
-
-        <dependency>
-            <groupId>org.springframework.boot</groupId>
-            <artifactId>spring-boot-devtools</artifactId>
-            <scope>runtime</scope>
-            <optional>true</optional>
-        </dependency>
-        <dependency>
-            <groupId>org.springframework.boot</groupId>
-            <artifactId>spring-boot-configuration-processor</artifactId>
-            <optional>true</optional>
-        </dependency>
     </dependencies>
     </dependencies>
 
 
     <build>
     <build>

+ 2 - 0
service-deploy/src/main/resources/application.yml

@@ -81,6 +81,8 @@ simuwang:
   # token过期时间,单位:分钟
   # token过期时间,单位:分钟
   token-expire: 1440
   token-expire: 1440
   token-secret: qwertyuiopasdfghjklzxcvbnm1234567890qwertyuiopasdfghjklzxcvbnm12
   token-secret: qwertyuiopasdfghjklzxcvbnm1234567890qwertyuiopasdfghjklzxcvbnm12
+  # 是否开启python的报告解析功能,开启后报告全部用python接口来解析;当开启时要配置如下python解析地址
+  enable-py-parser: false
   py-base-url: "http://192.168.1.224:8088"
   py-base-url: "http://192.168.1.224:8088"
   # rsa 公钥私钥配置
   # rsa 公钥私钥配置
   security-rsa:
   security-rsa:

+ 21 - 9
service-deploy/src/test/java/com/simuwang/ApplicationTest.java

@@ -25,13 +25,7 @@ public class ApplicationTest {
 
 
     @Test
     @Test
     public void test() {
     public void test() {
-        MailboxInfoDTO emailInfoDTO = new MailboxInfoDTO();
-        emailInfoDTO.setUserId(2395446);
-        emailInfoDTO.setAccount("mozuwen@simuwang.com");
-        emailInfoDTO.setPassword("Mzw@0306");
-        emailInfoDTO.setHost("imap.exmail.qq.com");
-        emailInfoDTO.setPort("993");
-        emailInfoDTO.setProtocol("imap");
+        MailboxInfoDTO emailInfoDTO = this.buildMailbox();
 //
 //
 //        emailInfoDTO.setAccount("jjpj_test");
 //        emailInfoDTO.setAccount("jjpj_test");
 //        emailInfoDTO.setPassword("shzq#919");
 //        emailInfoDTO.setPassword("shzq#919");
@@ -49,8 +43,15 @@ public class ApplicationTest {
     }
     }
 
 
     @Test
     @Test
-    public void pyTest() {
-
+    public void reportTest() {
+        MailboxInfoDTO emailInfoDTO = this.buildMailbox();
+        Date startDate = DateUtil.parse("2024-10-11 08:30:30", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date endDate = DateUtil.parse("2024-10-11 09:59:30", DateConst.YYYY_MM_DD_HH_MM_SS);
+        try {
+            emailParseService.parseEmail(emailInfoDTO, startDate, endDate);
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
     }
     }
 
 
     @Test
     @Test
@@ -72,4 +73,15 @@ public class ApplicationTest {
             System.out.println(dateString + ": -> " + date);
             System.out.println(dateString + ": -> " + date);
         }
         }
     }
     }
+
+    private MailboxInfoDTO buildMailbox() {
+        MailboxInfoDTO emailInfoDTO = new MailboxInfoDTO();
+        emailInfoDTO.setUserId(1);
+        emailInfoDTO.setAccount("*");
+        emailInfoDTO.setPassword("*");
+        emailInfoDTO.setHost("imap.exmail.qq.com");
+        emailInfoDTO.setPort("993");
+        emailInfoDTO.setProtocol("imap");
+        return emailInfoDTO;
+    }
 }
 }

+ 0 - 21
service-manage/src/main/java/com/simuwang/manage/api/test/ReportParseTestApi.java

@@ -1,21 +0,0 @@
-package com.simuwang.manage.api.test;
-
-import com.simuwang.daq.service.ReportParseService;
-import org.springframework.web.bind.annotation.GetMapping;
-import org.springframework.web.bind.annotation.RequestMapping;
-import org.springframework.web.bind.annotation.RestController;
-
-@RestController
-@RequestMapping("/v1/test/parse")
-public class ReportParseTestApi {
-    private final ReportParseService service;
-
-    public ReportParseTestApi(ReportParseService service) {
-        this.service = service;
-    }
-
-    @GetMapping("monthly")
-    public void monthly() {
-        this.service.parse();
-    }
-}