Преглед на файлове

feat:定期报告解析完成季报、年报的公共解析逻辑

wangzaijun преди 6 месеца
родител
ревизия
48e5cea06a
променени са 30 файла, в които са добавени 975 реда и са изтрити 263 реда
  1. 18 6
      service-base/src/main/java/com/simuwang/base/common/enums/ReportType.java
  2. 2 1
      service-base/src/main/java/com/simuwang/base/mapper/EmailFieldMappingMapper.java
  3. 4 0
      service-base/src/main/java/com/simuwang/base/pojo/dos/EmailFieldMappingDO.java
  4. 3 3
      service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportAssetAllocationDO.java
  5. 52 2
      service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportFundInfoDO.java
  6. 4 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/AnnuallyReportData.java
  7. 57 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/BaseReportDTO.java
  8. 32 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/BaseReportLevelDTO.java
  9. 4 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/MonthlyReportData.java
  10. 5 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/PythonResult.java
  11. 4 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/QuarterlyReportData.java
  12. 16 9
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportAssetAllocationDTO.java
  13. 9 2
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportBaseInfoDTO.java
  14. 5 0
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportData.java
  15. 40 22
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFinancialIndicatorsDTO.java
  16. 66 9
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFundInfoDTO.java
  17. 14 8
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportInvestmentIndustryDTO.java
  18. 29 20
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportNetReportDTO.java
  19. 3 1
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParserParams.java
  20. 25 20
      service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportShareChangeDTO.java
  21. 4 0
      service-base/src/main/resources/mapper/EmailFieldMappingMapper.xml
  22. 9 9
      service-daq/src/main/java/com/simuwang/daq/components/CustomPDFTextStripper.java
  23. 6 6
      service-daq/src/main/java/com/simuwang/daq/components/PythonReportConverter.java
  24. 31 6
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/AbstractReportParser.java
  25. 116 57
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/AbstractPDReportParser.java
  26. 204 9
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDAnnuallyReportParser.java
  27. 21 42
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDMonthlyReportParser.java
  28. 173 8
      service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDQuarterlyReportParser.java
  29. 17 21
      service-daq/src/main/java/com/simuwang/daq/service/EmailParseService.java
  30. 2 2
      service-deploy/src/test/java/com/simuwang/ApplicationTest.java

+ 18 - 6
service-base/src/main/java/com/simuwang/base/common/enums/ReportType.java

@@ -2,19 +2,31 @@ package com.simuwang.base.common.enums;
 
 import lombok.Getter;
 
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
 @Getter
 public enum ReportType {
-    MONTHLY(0, "月报", "月"),
-    QUARTERLY(1, "季报", "季度"),
-    ANNUALLY(2, "年报", "年度");
+    MONTHLY(0, "月报", new String[]{"月", "月度", "月报"}),
+    QUARTERLY(1, "季报", new String[]{"季", "季度", "季报"}),
+    ANNUALLY(2, "年报", new String[]{"年", "年度", "年报"});
 
     private final int type;
     private final String label;
-    private final String pattern;
+    private final String[] patterns;
 
-    ReportType(int type, String label, String pattern) {
+    ReportType(int type, String label, String[] patterns) {
         this.type = type;
         this.label = label;
-        this.pattern = pattern;
+        this.patterns = patterns;
+    }
+
+    public static String getAllPatterns() {
+        return String.join("|", patterns());
+    }
+
+    public static List<String> patterns() {
+        return Arrays.stream(ReportType.values()).flatMap(e -> Arrays.stream(e.getPatterns())).collect(Collectors.toList());
     }
 }

+ 2 - 1
service-base/src/main/java/com/simuwang/base/mapper/EmailFieldMappingMapper.java

@@ -11,8 +11,9 @@ public interface EmailFieldMappingMapper {
     /**
      * 获取净值文件字段识别映射配置
      *
+     * @param type 0-公共的字段,1-净值和估值表解析的字段,3-定期报告解析的字段
      * @return 净值文件字段识别映射配置
      */
-    List<EmailFieldMappingDO> getEmailFieldMapping();
+    List<EmailFieldMappingDO> getEmailFieldMapping(Integer type);
 
 }

+ 4 - 0
service-base/src/main/java/com/simuwang/base/pojo/dos/EmailFieldMappingDO.java

@@ -26,6 +26,10 @@ public class EmailFieldMappingDO {
     @TableField(value = "name")
     private String name;
     /**
+     * 1-净值或估值表,3-定期报告,0-表示共用的,默认0
+     */
+    private Integer type;
+    /**
      * 记录的有效性;1-有效;0-无效;
      */
     @TableField(value = "isvalid")

+ 3 - 3
service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportAssetAllocationDO.java

@@ -16,15 +16,15 @@ import java.math.BigDecimal;
 @TableName("amac_report_asset_allocation")
 public class ReportAssetAllocationDO extends BaseReportDO {
     /**
-     * 资产类
+     * 资产
      */
     private String assetType;
     /**
-     * 资产类别
+     * 资产明细
      */
     private String columnName;
     /**
-     * 资产类别
+     * 市值
      */
     private BigDecimal marketValue;
     /**

+ 52 - 2
service-base/src/main/java/com/simuwang/base/pojo/dos/report/ReportFundInfoDO.java

@@ -17,23 +17,73 @@ import java.util.Date;
 @Getter
 @TableName("amac_report_fund_info")
 public class ReportFundInfoDO extends BaseReportDO {
+    /**
+     * 投资顾问
+     */
     private String advisorName;
+    /**
+     * 基金托管人
+     */
     private String custodianName;
+    /**
+     * 基金经理描述
+     */
     private String fundManager;
+    /**
+     * 基金名称
+     */
     private String fundName;
+    /**
+     * 投资策略
+     */
     private String fundStrategyDescription;
+    /**
+     * 基金成立日期
+     */
     private Date inceptionDate;
+    /**
+     * 行业趋势
+     */
     private String industryTrend;
+    /**
+     * 投资目标
+     */
     private String investmentObjective;
+    /**
+     * 杠杆比例
+     */
     private BigDecimal leverage;
+    /**
+     * 杠杆比例描述
+     */
     private String leverageNote;
+    /**
+     * 基金运作方式
+     */
     private String operationType;
+    /**
+     * 备案编码
+     */
     private String registerNumber;
+    /**
+     * 风险收益特征
+     */
     private String riskReturnDesc;
+    /**
+     * 业绩比较基准
+     */
     private String secondaryBenchmark;
+    /**
+     * 基金管理人
+     */
     private String trustName;
-
+    /**
+     * 基金到期日期
+     */
     private Date dueDate;
+    /**
+     * 信息披露报告是否经托管机构复核
+     */
     @TableField(value = "reviewed")
-    private Integer isReviewed;
+    private Integer reviewed;
 }

+ 4 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/AnnuallyReportData.java

@@ -7,6 +7,10 @@ import lombok.Setter;
 @Setter
 @Getter
 public class AnnuallyReportData extends QuarterlyReportData {
+    public AnnuallyReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
+        super(baseInfo, fundInfo);
+    }
+
     @Override
     public ReportType getReportType() {
         return ReportType.ANNUALLY;

+ 57 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/BaseReportDTO.java

@@ -1,18 +1,75 @@
 package com.simuwang.base.pojo.dto.report;
 
+import cn.hutool.core.date.DatePattern;
+import cn.hutool.core.date.DateUtil;
+import cn.hutool.core.util.StrUtil;
 import com.simuwang.base.pojo.dos.report.BaseReportDO;
 import lombok.Getter;
 import lombok.Setter;
 
+import java.math.BigDecimal;
+import java.util.Date;
+
+/**
+ * @author wangzaijun
+ * @date 2024/10/9 11:08
+ * @description 抽象的报告数据父类,全部字段用string传递
+ */
 @Setter
 @Getter
 public abstract class BaseReportDTO<T extends BaseReportDO> {
     private Integer fileId;
 
+    public BaseReportDTO() {
+    }
+
+    public BaseReportDTO(Integer fileId) {
+        this.fileId = fileId;
+    }
+
     public abstract T toEntity();
 
     @Override
     public String toString() {
         return "fileId=" + fileId;
     }
+
+    /**
+     * 字符串转日期类型
+     *
+     * @param input 待转换的字符串
+     * @return /
+     */
+    protected Date toDate(String input) {
+        if (StrUtil.isBlank(input)) {
+            return null;
+        }
+        try {
+            // 日期格式化,支持三种格式:yyyy年MM月dd日、yyyy-MM-dd和yyyy/MM/dd
+            return DateUtil.parse(input.trim(),
+                    DatePattern.CHINESE_DATE_PATTERN, DatePattern.NORM_DATE_PATTERN, "yyyy/MM/dd");
+        } catch (Exception ignored) {
+        }
+        return null;
+    }
+
+    /**
+     * 字符串转数字
+     *
+     * @param input 待转换的字符串
+     * @return /
+     */
+    protected BigDecimal toBigDecimal(String input) {
+        if (StrUtil.isBlank(input)) {
+            return null;
+        }
+        try {
+            // 移除所有非数字和“.”字符
+            String cleanedInput = input.trim().replaceAll("[^\\d.]", "");
+            // 创建BigDecimal对象
+            return new BigDecimal(cleanedInput);
+        } catch (NumberFormatException ignored) {
+        }
+        return null;
+    }
 }

+ 32 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/BaseReportLevelDTO.java

@@ -0,0 +1,32 @@
+package com.simuwang.base.pojo.dto.report;
+
+import com.simuwang.base.pojo.dos.report.BaseReportDO;
+import lombok.Getter;
+import lombok.Setter;
+
+@Setter
+@Getter
+public abstract class BaseReportLevelDTO<T extends BaseReportDO> extends BaseReportDTO<T> {
+    /**
+     * 基金分级
+     */
+    private String level;
+
+    public BaseReportLevelDTO() {
+        super();
+    }
+
+    public BaseReportLevelDTO(Integer fileId) {
+        super(fileId);
+    }
+
+    public BaseReportLevelDTO(Integer fileId, String level) {
+        super(fileId);
+        this.level = level;
+    }
+
+    @Override
+    public String toString() {
+        return super.toString() + ", level='" + this.level + "'";
+    }
+}

+ 4 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/MonthlyReportData.java

@@ -11,6 +11,10 @@ import java.util.List;
 public class MonthlyReportData extends ReportData {
     private List<ReportNetReportDTO> netReport;
 
+    public MonthlyReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
+        super(baseInfo, fundInfo);
+    }
+
     @Override
     public ReportType getReportType() {
         return ReportType.MONTHLY;

+ 5 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/PythonResult.java

@@ -3,6 +3,11 @@ package com.simuwang.base.pojo.dto.report;
 import lombok.Getter;
 import lombok.Setter;
 
+/**
+ * @author wangzaijun
+ * @date 2024/10/10 14:08
+ * @description python接口请求的返回结构
+ */
 @Setter
 @Getter
 public class PythonResult<T extends ReportData> {

+ 4 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/QuarterlyReportData.java

@@ -19,6 +19,10 @@ public class QuarterlyReportData extends ReportData {
     private List<ReportInvestmentIndustryDTO> investmentIndustry;
     private List<ReportShareChangeDTO> shareChange;
 
+    public QuarterlyReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
+        super(baseInfo, fundInfo);
+    }
+
     @Override
     public ReportType getReportType() {
         return ReportType.QUARTERLY;

+ 16 - 9
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportAssetAllocationDTO.java

@@ -4,8 +4,6 @@ import com.simuwang.base.pojo.dos.report.ReportAssetAllocationDO;
 import lombok.Getter;
 import lombok.Setter;
 
-import java.math.BigDecimal;
-
 /**
  * @author wangzaijun
  * @date 2024/9/26 16:43
@@ -15,28 +13,37 @@ import java.math.BigDecimal;
 @Getter
 public class ReportAssetAllocationDTO extends BaseReportDTO<ReportAssetAllocationDO> {
     /**
-     * 资产类
+     * 资产
      */
     private String assetType;
     /**
-     * 资产类别
+     * 资产明细
      */
-    private String columnName;
+    private String assetDetails;
     /**
-     * 资产类别
+     * 市值
      */
-    private BigDecimal marketValue;
+    private String marketValue;
     /**
      * 备注
      */
     private String remark;
 
+    public ReportAssetAllocationDTO() {
+        super();
+    }
+
+    public ReportAssetAllocationDTO(Integer fileId) {
+        super(fileId);
+    }
+
     @Override
     public ReportAssetAllocationDO toEntity() {
         ReportAssetAllocationDO entity = new ReportAssetAllocationDO();
         entity.setFileId(this.getFileId());
         entity.setAssetType(this.assetType);
-        entity.setMarketValue(this.marketValue);
+        entity.setColumnName(this.assetDetails);
+        entity.setMarketValue(this.toBigDecimal(this.marketValue));
         entity.setRemark(this.remark);
         return entity;
     }
@@ -46,7 +53,7 @@ public class ReportAssetAllocationDTO extends BaseReportDTO<ReportAssetAllocatio
         return "{" +
                 super.toString() +
                 ", assetType='" + assetType + '\'' +
-                ", columnName='" + columnName + '\'' +
+                ", assetDetails='" + assetDetails + '\'' +
                 ", marketValue=" + marketValue +
                 ", remark='" + remark + '\'' +
                 '}';

+ 9 - 2
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportBaseInfoDTO.java

@@ -1,6 +1,5 @@
 package com.simuwang.base.pojo.dto.report;
 
-import cn.hutool.core.date.DateUtil;
 import com.simuwang.base.pojo.dos.report.ReportBaseInfoDO;
 import lombok.Getter;
 import lombok.Setter;
@@ -26,11 +25,19 @@ public class ReportBaseInfoDTO extends BaseReportDTO<ReportBaseInfoDO> {
      */
     private String reportType;
 
+    public ReportBaseInfoDTO() {
+        super();
+    }
+
+    public ReportBaseInfoDTO(Integer fileId) {
+        super(fileId);
+    }
+
     @Override
     public ReportBaseInfoDO toEntity() {
         ReportBaseInfoDO entity = new ReportBaseInfoDO();
         entity.setFileId(this.getFileId());
-        entity.setReportDate(this.reportDate == null ? null : DateUtil.parseDate(this.reportDate));
+        entity.setReportDate(this.toDate(this.reportDate));
         entity.setReportName(this.reportName);
         entity.setReportType(this.reportType);
         return entity;

+ 5 - 0
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportData.java

@@ -21,6 +21,11 @@ public abstract class ReportData {
      */
     private ReportFundInfoDTO fundInfo;
 
+    public ReportData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo) {
+        this.baseInfo = baseInfo;
+        this.fundInfo = fundInfo;
+    }
+
     public abstract ReportType getReportType();
 
     @Override

+ 40 - 22
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFinancialIndicatorsDTO.java

@@ -4,38 +4,57 @@ import com.simuwang.base.pojo.dos.report.ReportFinancialIndicatorsDO;
 import lombok.Getter;
 import lombok.Setter;
 
-import java.math.BigDecimal;
-
 @Setter
 @Getter
-public class ReportFinancialIndicatorsDTO extends BaseReportDTO<ReportFinancialIndicatorsDO> {
-    private String level;
-
+public class ReportFinancialIndicatorsDTO extends BaseReportLevelDTO<ReportFinancialIndicatorsDO> {
     /**
      * 年度
      */
-    private Integer endDate;
-
-    private BigDecimal fundAssetSize;
-    private BigDecimal nav;
-    private BigDecimal profit;
-    private BigDecimal realizedIncome;
+    private Integer yearly;
+    /**
+     * 期末基金净资产
+     */
+    private String assetNet;
+    /**
+     * 报告期期末单位净值
+     */
+    private String nav;
+    /**
+     * 本期利润
+     */
+    private String profit;
+    /**
+     * 本期已实现收益
+     */
+    private String realizedIncome;
     /**
      * 期末可供分配利润
      */
-    private BigDecimal undistributedProfit;
+    private String undistributedProfit;
+
+    public ReportFinancialIndicatorsDTO() {
+        super();
+    }
+
+    public ReportFinancialIndicatorsDTO(Integer fileId) {
+        super(fileId);
+    }
+
+    public ReportFinancialIndicatorsDTO(Integer fileId, String level) {
+        super(fileId, level);
+    }
 
     @Override
     public ReportFinancialIndicatorsDO toEntity() {
         ReportFinancialIndicatorsDO entity = new ReportFinancialIndicatorsDO();
         entity.setFileId(this.getFileId());
-        entity.setLevel(this.level);
-        entity.setEndDate(this.endDate);
-        entity.setFundAssetSize(this.fundAssetSize);
-        entity.setNav(this.nav);
-        entity.setProfit(this.profit);
-        entity.setRealizedIncome(this.realizedIncome);
-        entity.setUndistributedProfit(this.undistributedProfit);
+        entity.setLevel(this.getLevel());
+        entity.setEndDate(this.yearly);
+        entity.setFundAssetSize(this.toBigDecimal(this.assetNet));
+        entity.setNav(this.toBigDecimal(this.nav));
+        entity.setProfit(this.toBigDecimal(this.profit));
+        entity.setRealizedIncome(this.toBigDecimal(this.realizedIncome));
+        entity.setUndistributedProfit(this.toBigDecimal(this.undistributedProfit));
         return entity;
     }
 
@@ -43,9 +62,8 @@ public class ReportFinancialIndicatorsDTO extends BaseReportDTO<ReportFinancialI
     public String toString() {
         return "{" +
                 super.toString() +
-                ", level='" + level + '\'' +
-                ", endDate=" + endDate +
-                ", fundAssetSize=" + fundAssetSize +
+                ", yearly=" + yearly +
+                ", assetNet=" + assetNet +
                 ", nav=" + nav +
                 ", profit=" + profit +
                 ", undistributedProfit=" + undistributedProfit +

+ 66 - 9
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportFundInfoDTO.java

@@ -1,11 +1,10 @@
 package com.simuwang.base.pojo.dto.report;
 
-import cn.hutool.core.date.DateUtil;
 import com.simuwang.base.pojo.dos.report.ReportFundInfoDO;
 import lombok.Getter;
 import lombok.Setter;
 
-import java.math.BigDecimal;
+import java.util.Objects;
 
 /**
  * @author wangzaijun
@@ -15,24 +14,82 @@ import java.math.BigDecimal;
 @Setter
 @Getter
 public class ReportFundInfoDTO extends BaseReportDTO<ReportFundInfoDO> {
+    /**
+     * 投资顾问
+     */
     private String advisorName;
+    /**
+     * 基金托管人
+     */
     private String custodianName;
+    /**
+     * 基金经理描述
+     */
     private String fundManager;
+    /**
+     * 基金名称
+     */
     private String fundName;
+    /**
+     * 投资策略
+     */
     private String fundStrategyDescription;
+    /**
+     * 基金成立日期
+     */
     private String inceptionDate;
+    /**
+     * 行业趋势
+     */
     private String industryTrend;
+    /**
+     * 投资目标
+     */
     private String investmentObjective;
-    private BigDecimal leverage;
+    /**
+     * 杠杆比例
+     */
+    private String leverage;
+    /**
+     * 杠杆比例描述
+     */
     private String leverageNote;
+    /**
+     * 基金运作方式
+     */
     private String operationType;
+    /**
+     * 备案编码
+     */
     private String registerNumber;
+    /**
+     * 风险收益特征
+     */
     private String riskReturnDesc;
+    /**
+     * 业绩比较基准
+     */
     private String secondaryBenchmark;
+    /**
+     * 基金管理人
+     */
     private String trustName;
-
+    /**
+     * 基金到期日期
+     */
     private String dueDate;
-    private Integer isReviewed;
+    /**
+     * 信息披露报告是否经托管机构复核
+     */
+    private String isReviewed;
+
+    public ReportFundInfoDTO() {
+        super();
+    }
+
+    public ReportFundInfoDTO(Integer fileId) {
+        super(fileId);
+    }
 
     @Override
     public ReportFundInfoDO toEntity() {
@@ -43,18 +100,18 @@ public class ReportFundInfoDTO extends BaseReportDTO<ReportFundInfoDO> {
         entity.setFundManager(this.fundManager);
         entity.setFundName(this.fundName);
         entity.setFundStrategyDescription(this.fundStrategyDescription);
-        entity.setInceptionDate(this.inceptionDate == null ? null : DateUtil.parseDate(this.inceptionDate));
+        entity.setInceptionDate(this.toDate(this.inceptionDate));
         entity.setIndustryTrend(this.industryTrend);
         entity.setInvestmentObjective(this.investmentObjective);
-        entity.setLeverage(this.leverage);
+        entity.setLeverage(this.toBigDecimal(this.leverage));
         entity.setLeverageNote(this.leverageNote);
         entity.setOperationType(this.operationType);
         entity.setRegisterNumber(this.registerNumber);
         entity.setRiskReturnDesc(this.riskReturnDesc);
         entity.setSecondaryBenchmark(this.secondaryBenchmark);
         entity.setTrustName(this.trustName);
-        entity.setDueDate(this.dueDate == null ? null : DateUtil.parseDate(this.dueDate));
-        entity.setIsReviewed(this.isReviewed);
+        entity.setDueDate(this.toDate(this.dueDate));
+        entity.setReviewed(Objects.equals("是", this.isReviewed) ? 1 : 0);
         return entity;
     }
 

+ 14 - 8
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportInvestmentIndustryDTO.java

@@ -4,8 +4,6 @@ import com.simuwang.base.pojo.dos.report.ReportInvestmentIndustryDO;
 import lombok.Getter;
 import lombok.Setter;
 
-import java.math.BigDecimal;
-
 /**
  * @author wangzaijun
  * @date 2024/9/26 16:49
@@ -31,13 +29,21 @@ public class ReportInvestmentIndustryDTO extends BaseReportDTO<ReportInvestmentI
      */
     private String isbCode;
     /**
-     * 公允价值
+     * 公允价值,市值
      */
-    private BigDecimal marketValue;
+    private String marketValue;
     /**
-     * 占基金资产净值的比例
+     * 占基金资产净值的比例,占净值比,权重
      */
-    private BigDecimal ratio;
+    private String ratio;
+
+    public ReportInvestmentIndustryDTO() {
+        super();
+    }
+
+    public ReportInvestmentIndustryDTO(Integer fileId) {
+        super(fileId);
+    }
 
     @Override
     public ReportInvestmentIndustryDO toEntity() {
@@ -47,8 +53,8 @@ public class ReportInvestmentIndustryDTO extends BaseReportDTO<ReportInvestmentI
         entity.setIndustryName(this.industryName);
         entity.setInvestType(this.investType);
         entity.setIsbCode(this.isbCode);
-        entity.setMarketValue(this.marketValue);
-        entity.setRatio(this.ratio);
+        entity.setMarketValue(this.toBigDecimal(this.marketValue));
+        entity.setRatio(this.toBigDecimal(this.ratio));
         return entity;
     }
 

+ 29 - 20
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportNetReportDTO.java

@@ -1,12 +1,9 @@
 package com.simuwang.base.pojo.dto.report;
 
-import cn.hutool.core.date.DateUtil;
 import com.simuwang.base.pojo.dos.report.ReportNetReportDO;
 import lombok.Getter;
 import lombok.Setter;
 
-import java.math.BigDecimal;
-
 /**
  * @author wangzaijun
  * @date 2024/9/26 16:53
@@ -14,37 +11,50 @@ import java.math.BigDecimal;
  */
 @Setter
 @Getter
-public class ReportNetReportDTO extends BaseReportDTO<ReportNetReportDO> {
-    private String level;
+public class ReportNetReportDTO extends BaseReportLevelDTO<ReportNetReportDO> {
+    /**
+     * 估值日期
+     */
     private String valuationDate;
-
     /**
      * 累计净值
      */
-    private BigDecimal cumulativeNav;
+    private String cumulativeNavWithdrawal;
     /**
      * 基金份额总额
      */
-    private BigDecimal endTotalShares;
+    private String assetShare;
     /**
      * 基金资产净值
      */
-    private BigDecimal fundAssetSize;
+    private String assetNet;
     /**
      * 单位净值
      */
-    private BigDecimal nav;
+    private String nav;
+
+    public ReportNetReportDTO() {
+        super();
+    }
+
+    public ReportNetReportDTO(Integer fileId) {
+        super(fileId);
+    }
+
+    public ReportNetReportDTO(Integer fileId, String level) {
+        super(fileId, level);
+    }
 
     @Override
     public ReportNetReportDO toEntity() {
         ReportNetReportDO entity = new ReportNetReportDO();
         entity.setFileId(this.getFileId());
-        entity.setLevel(this.level);
-        entity.setValuationDate(this.valuationDate == null ? null : DateUtil.parseDate(this.valuationDate));
-        entity.setCumulativeNav(this.cumulativeNav);
-        entity.setEndTotalShares(this.endTotalShares);
-        entity.setFundAssetSize(this.fundAssetSize);
-        entity.setNav(this.nav);
+        entity.setLevel(this.getLevel());
+        entity.setValuationDate(this.toDate(this.valuationDate));
+        entity.setCumulativeNav(this.toBigDecimal(this.cumulativeNavWithdrawal));
+        entity.setEndTotalShares(this.toBigDecimal(this.assetShare));
+        entity.setFundAssetSize(this.toBigDecimal(this.assetNet));
+        entity.setNav(this.toBigDecimal(this.nav));
         return entity;
     }
 
@@ -52,11 +62,10 @@ public class ReportNetReportDTO extends BaseReportDTO<ReportNetReportDO> {
     public String toString() {
         return "{" +
                 super.toString() +
-                ", level='" + level + '\'' +
                 ", valuationDate='" + valuationDate + '\'' +
-                ", cumulativeNav=" + cumulativeNav +
-                ", endTotalShares=" + endTotalShares +
-                ", fundAssetSize=" + fundAssetSize +
+                ", cumulativeNavWithdrawal=" + cumulativeNavWithdrawal +
+                ", assetShare=" + assetShare +
+                ", fundAssetSize=" + assetNet +
                 ", nav=" + nav +
                 '}';
     }

+ 3 - 1
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportParserParams.java

@@ -22,6 +22,8 @@ public class ReportParserParams {
      * 文件路径
      */
     private String filepath;
-
+    /**
+     * 备案编码
+     */
     private String registerNumber;
 }

+ 25 - 20
service-base/src/main/java/com/simuwang/base/pojo/dto/report/ReportShareChangeDTO.java

@@ -4,8 +4,6 @@ import com.simuwang.base.pojo.dos.report.ReportShareChangeDO;
 import lombok.Getter;
 import lombok.Setter;
 
-import java.math.BigDecimal;
-
 /**
  * @author wangzaijun
  * @date 2024/9/26 16:40
@@ -13,42 +11,50 @@ import java.math.BigDecimal;
  */
 @Setter
 @Getter
-public class ReportShareChangeDTO extends BaseReportDTO<ReportShareChangeDO> {
-    /**
-     * 基金分级
-     */
-    private String level;
+public class ReportShareChangeDTO extends BaseReportLevelDTO<ReportShareChangeDO> {
     /**
      * 报告期期初基金份额总额
      */
-    private BigDecimal initTotalShares;
+    private String initTotalShares;
     /**
      * 减: 报告期期间基金总赎回份额
      */
-    private BigDecimal redemption;
+    private String redemption;
     /**
      * 期末基金总份额/期末基金实缴总额
      */
-    private BigDecimal sharePerAsset;
+    private String sharePerAsset;
     /**
      * 报告期期间基金拆分变动份额
      */
-    private BigDecimal split;
+    private String splitChangeShare;
     /**
      * 报告期期间基金总申购份额
      */
-    private BigDecimal subscription;
+    private String subscription;
+
+    public ReportShareChangeDTO() {
+        super();
+    }
+
+    public ReportShareChangeDTO(Integer fileId) {
+        super(fileId);
+    }
+
+    public ReportShareChangeDTO(Integer fileId, String level) {
+        super(fileId, level);
+    }
 
     @Override
     public ReportShareChangeDO toEntity() {
         ReportShareChangeDO entity = new ReportShareChangeDO();
         entity.setFileId(this.getFileId());
-        entity.setLevel(this.level);
-        entity.setRedemption(this.redemption);
-        entity.setInitTotalShares(this.initTotalShares);
-        entity.setSharePerAsset(this.sharePerAsset);
-        entity.setSplit(this.split);
-        entity.setSubscription(this.subscription);
+        entity.setLevel(this.getLevel());
+        entity.setRedemption(this.toBigDecimal(this.redemption));
+        entity.setInitTotalShares(this.toBigDecimal(this.initTotalShares));
+        entity.setSharePerAsset(this.toBigDecimal(this.sharePerAsset));
+        entity.setSplit(this.toBigDecimal(this.splitChangeShare));
+        entity.setSubscription(this.toBigDecimal(this.subscription));
         return entity;
     }
 
@@ -56,11 +62,10 @@ public class ReportShareChangeDTO extends BaseReportDTO<ReportShareChangeDO> {
     public String toString() {
         return "{" +
                 super.toString() +
-                ", level='" + level + '\'' +
                 ", initTotalShares=" + initTotalShares +
                 ", redemption=" + redemption +
                 ", sharePerAsset=" + sharePerAsset +
-                ", split=" + split +
+                ", splitChangeShare=" + splitChangeShare +
                 ", subscription=" + subscription +
                 '}';
     }

+ 4 - 0
service-base/src/main/resources/mapper/EmailFieldMappingMapper.xml

@@ -5,6 +5,7 @@
         <id column="id" property="id"/>
         <result column="code" property="code"/>
         <result column="name" property="name"/>
+        <result column="type" property="type"/>
         <result column="isvalid" property="isvalid"/>
         <result column="creatorid" property="creatorId"/>
         <result column="createtime" property="createTime"/>
@@ -16,6 +17,9 @@
         select *
         from PPW_EMAIL.email_field_mapping
         where isvalid = 1
+        <if test="type != null">
+            and (TYPE = #{type} or TYPE = 0)
+        </if>
     </select>
 
 </mapper>

+ 9 - 9
service-daq/src/main/java/com/simuwang/daq/components/CustomPDFTextStripper.java

@@ -15,22 +15,22 @@ import static com.simuwang.base.common.conts.Constants.WATERMARK_REPLACE;
 /**
  * @author wangzaijun
  * @date 2024/9/12 14:00
- * @description 自定义的文本去水印方法,发现水印基本是旋转文字并且比报告内其他文字都大,区别于表格文字去水印的实现
- * @see CustomTabulaTextStripper
+ * @description 自定义的文本去水印方法,发现水印基本是旋转文字并且比报告内其他文字都大
+ * @see CustomTabulaTextStripper 区别于表格文字去水印的实现
  */
 public class CustomPDFTextStripper extends PDFTextStripper {
     @Override
     protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
-        // 水印文字基本都是有角度的,统计有旋转角度的文字
-        List<Float> weights = textPositions.stream().filter(e -> e.getTextMatrix().getValue(0, 1) != 0.)
-                .map(TextPosition::getWidth).collect(Collectors.toList());
+        // 水印文字基本都是有角度的,统计有旋转角度的文字
+        List<Float> heights = textPositions.stream().filter(e -> e.getTextMatrix().getValue(0, 1) != 0.)
+                .map(TextPosition::getHeight).collect(Collectors.toList());
         // 集合为空表示text的内容没有水印影响,直接输出该内容
-        if (CollUtil.isEmpty(weights)) {
+        if (CollUtil.isEmpty(heights)) {
             super.writeString(text);
             return;
         }
         // 如果全是水印文字则直接去除
-        if (textPositions.size() == weights.size()) {
+        if (textPositions.size() == heights.size()) {
             super.writeString(WATERMARK_REPLACE);
             return;
         }
@@ -38,8 +38,8 @@ public class CustomPDFTextStripper extends PDFTextStripper {
         List<String> newTexts = ListUtil.list(false);
         for (TextPosition textPosition : textPositions) {
             float col = textPosition.getTextMatrix().getValue(0, 1);
-            float width = textPosition.getWidth();
-            newTexts.add(col == 0. && !weights.contains(width) ? textPosition.getUnicode() : WATERMARK_REPLACE);
+            float height = textPosition.getHeight();
+            newTexts.add(col == 0. && !heights.contains(height) ? textPosition.getUnicode() : WATERMARK_REPLACE);
         }
         super.writeString(String.join(StrUtil.EMPTY, newTexts));
     }

+ 6 - 6
service-daq/src/main/java/com/simuwang/daq/components/PythonReportConverter.java

@@ -45,17 +45,17 @@ public class PythonReportConverter {
     }
 
     private static MonthlyReportData convertMonthly(JSONObject jsonObject) {
-        MonthlyReportData reportData = new MonthlyReportData();
-        reportData.setBaseInfo(convertToObj(jsonObject, "base_info", ReportBaseInfoDTO.class));
-        reportData.setFundInfo(convertToObj(jsonObject, "fund_info", ReportFundInfoDTO.class));
+        ReportBaseInfoDTO baseInfo = convertToObj(jsonObject, "base_info", ReportBaseInfoDTO.class);
+        ReportFundInfoDTO fundInfo = convertToObj(jsonObject, "fund_info", ReportFundInfoDTO.class);
+        MonthlyReportData reportData = new MonthlyReportData(baseInfo, fundInfo);
         reportData.setNetReport(convertToList(jsonObject, "net_report", ReportNetReportDTO.class));
         return reportData;
     }
 
     private static QuarterlyReportData convertQuarterly(JSONObject jsonObject) {
-        QuarterlyReportData reportData = new QuarterlyReportData();
-        reportData.setBaseInfo(convertToObj(jsonObject, "base_info", ReportBaseInfoDTO.class));
-        reportData.setFundInfo(convertToObj(jsonObject, "fund_info", ReportFundInfoDTO.class));
+        ReportBaseInfoDTO baseInfo = convertToObj(jsonObject, "base_info", ReportBaseInfoDTO.class);
+        ReportFundInfoDTO fundInfo = convertToObj(jsonObject, "fund_info", ReportFundInfoDTO.class);
+        QuarterlyReportData reportData = new QuarterlyReportData(baseInfo, fundInfo);
         reportData.setAssetAllocation(convertToList(jsonObject, "asset_allocation", ReportAssetAllocationDTO.class));
         reportData.setFinancialIndicators(convertToList(jsonObject, "financial_indicators", ReportFinancialIndicatorsDTO.class));
         reportData.setInvestmentIndustry(convertToList(jsonObject, "investment_industry", ReportInvestmentIndustryDTO.class));

+ 31 - 6
service-daq/src/main/java/com/simuwang/daq/components/report/parser/AbstractReportParser.java

@@ -12,6 +12,8 @@ import org.slf4j.LoggerFactory;
 
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
+import java.util.regex.Pattern;
 
 /**
  * @author wangzaijun
@@ -36,7 +38,7 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
      * 初始化数据的方法
      */
     protected void init() {
-        List<EmailFieldMappingDO> emailFieldMapping = this.fieldMappingMapper.getEmailFieldMapping();
+        List<EmailFieldMappingDO> emailFieldMapping = this.fieldMappingMapper.getEmailFieldMapping(3);
         if (CollUtil.isEmpty(emailFieldMapping)) {
             this.logger.error("未设置报告解析规则!");
             return;
@@ -64,6 +66,9 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
      * @param info       待设置的对象
      */
     protected void buildInfo(Map<String, Object> extInfoMap, Object info) {
+        if (MapUtil.isEmpty(extInfoMap)) {
+            return;
+        }
         for (Map.Entry<String, Object> entry : extInfoMap.entrySet()) {
             String k = this.cleaningValue(entry.getKey());
             String fieldValue = this.cleaningValue(entry.getValue());
@@ -79,13 +84,33 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
         }
     }
 
-    private String cleaningValue(Object value) {
+    protected String cleaningValue(Object value) {
+        return this.cleaningValue(value, true);
+    }
+
+    /**
+     * 数据简单清洗,并全部转为字符串类型
+     *
+     * @param value              待清洗的数据
+     * @param replaceParentheses 是否替换圆括号
+     * @return /
+     */
+    protected String cleaningValue(Object value, boolean replaceParentheses) {
         String fieldValue = StrUtil.toStringOrNull(value);
-        if (fieldValue.startsWith("-") || fieldValue.endsWith("-")) {
-            fieldValue = null;
+        if (!StrUtil.isNullOrUndefined(fieldValue)) {
+            // 特殊字符替换,空格替换为空字符
+            fieldValue = fieldValue
+                    .replace("\r", StrUtil.EMPTY)
+                    .replace(";", ";")
+                    .replaceAll(" ", StrUtil.EMPTY);
+            if (replaceParentheses) {
+                // 正则表达式匹配中文括号及其内容,并替换为空字符串
+                fieldValue = Pattern.compile("[(|(][^)]*[)|)]").matcher(fieldValue).replaceAll(StrUtil.EMPTY);
+            }
         }
-        if (fieldValue != null) {
-            fieldValue = fieldValue.replace("\r", StrUtil.EMPTY);
+        // 如果仅有 “-” 该字段值为null
+        if (Objects.equals("-", fieldValue)) {
+            fieldValue = null;
         }
         return StrUtil.isBlank(fieldValue) ? null : fieldValue;
     }

+ 116 - 57
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/AbstractPDReportParser.java

@@ -1,15 +1,13 @@
 package com.simuwang.daq.components.report.parser.pdf;
 
-import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
 import com.simuwang.base.common.conts.Constants;
+import com.simuwang.base.common.enums.ReportType;
 import com.simuwang.base.common.exception.APIException;
 import com.simuwang.base.mapper.EmailFieldMappingMapper;
-import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
-import com.simuwang.base.pojo.dto.report.ReportData;
-import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
-import com.simuwang.base.pojo.dto.report.ReportParserParams;
+import com.simuwang.base.pojo.dto.report.*;
 import com.simuwang.daq.components.CustomPDFTextStripper;
 import com.simuwang.daq.components.report.parser.AbstractReportParser;
 import org.apache.pdfbox.Loader;
@@ -24,6 +22,9 @@ import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
 import java.io.IOException;
 import java.util.Calendar;
 import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.Function;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -35,6 +36,10 @@ import java.util.stream.Collectors;
  */
 public abstract class AbstractPDReportParser<T extends ReportData> extends AbstractReportParser<T> {
     /**
+     * 基金信息表格
+     */
+    protected Table fundInfoTable;
+    /**
      * 去除了水印的所有文本内容
      */
     protected List<String> textList;
@@ -45,22 +50,17 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
 
     @Override
     public T parse(ReportParserParams params) throws IOException {
+        // 初始化
         this.init();
-        // 解析报告名称和表格
-        String reportName = null;
+        // 解析报告和表格
         try (PDDocument document = Loader.loadPDF(new RandomAccessReadBufferedFile(params.getFilepath()))) {
+            // 识别所有文字(去水印后的)
             CustomPDFTextStripper stripper = new CustomPDFTextStripper();
             stripper.setSortByPosition(true);
-            String text = stripper.getText(document).replace(Constants.WATERMARK_REPLACE, System.lineSeparator());
+            String text = stripper.getText(document).replace(Constants.WATERMARK_REPLACE, StrUtil.EMPTY);
             this.textList = StrUtil.split(text, System.lineSeparator());
             this.textList.removeIf(StrUtil::isBlank);
-            if (CollUtil.isNotEmpty(this.textList)) {
-                reportName = this.matchReportName(this.textList.get(0));
-                if (StrUtil.isBlank(reportName)) {
-                    throw new APIException("未匹配到报告名称");
-                }
-            }
-            // 解析所有表格
+            // 解析所有表格(单元格字符去水印)
             List<Table> tables = ListUtil.list(true);
             SpreadsheetExtractionAlgorithm extractionAlgorithm = new SpreadsheetExtractionAlgorithm();
             // 自定义表格提取工具,去除单元格中的水印文字
@@ -71,20 +71,55 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
             }
             this.initTableInfo(tables);
         }
+        // 报告基本信息
+        ReportBaseInfoDTO reportInfo = this.buildReportInfo(params);
         // 解析报告中主体基金的基本信息
-        ReportFundInfoDTO reportFundInfo = this.parseFundInfo(params);
+        ReportFundInfoDTO reportFundInfo = this.buildFundInfo(params);
         // 解析其他表格信息并且设置结果字段
-        T reportData = this.parseExtInfoAndSetData(reportName, reportFundInfo, params);
+        T reportData = this.parseExtInfoAndSetData(reportInfo, reportFundInfo);
         // 数据清洗后返回
         this.cleaningReportData(reportData);
         return reportData;
     }
 
+    /**
+     * 初始化解析所有表格数据
+     *
+     * @param tables 按固定的表格模式划分到不同的对象中
+     */
     protected abstract void initTableInfo(List<Table> tables);
 
-    protected abstract ReportFundInfoDTO parseFundInfo(ReportParserParams params);
+    /**
+     * 绑定基金基本信息(年报的基金基本信息解析逻辑要覆盖重写)
+     *
+     * @param params /
+     * @return /
+     */
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        Table fundInfoTable = this.fundInfoTable;
+        if (fundInfoTable == null) {
+            throw new APIException("未解析到基本信息表格");
+        }
+        // 基金基本信息映射
+        return this.buildDto(params.getFileId(), fundInfoTable, ReportFundInfoDTO.class, this::parseFundInfo);
+    }
 
-    protected abstract T parseExtInfoAndSetData(String reportName, ReportFundInfoDTO fundInfo, ReportParserParams params);
+    /**
+     * 解析基金基本信息表格
+     *
+     * @param fundInfoTable 表格
+     * @return /
+     */
+    protected abstract Map<String, Object> parseFundInfo(Table fundInfoTable);
+
+    /**
+     * 解析报告的其他信息并设置到对象中
+     *
+     * @param reportInfo 报告基本信息
+     * @param fundInfo   报告中基金基本信息
+     * @return /
+     */
+    protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo);
 
     @Override
     protected void cleaningReportData(T reportData) {
@@ -94,13 +129,13 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
     /**
      * 构建报告基本信息
      *
-     * @param fileId     文件id
-     * @param reportName 报告名称
+     * @param params /
      * @return /
      */
-    protected ReportBaseInfoDTO buildReportInfo(Integer fileId, String reportName) {
-        ReportBaseInfoDTO reportInfo = new ReportBaseInfoDTO();
-        reportInfo.setFileId(fileId);
+    private ReportBaseInfoDTO buildReportInfo(ReportParserParams params) {
+        Integer fileId = params.getFileId();
+        String reportName = params.getFilename();
+        ReportBaseInfoDTO reportInfo = new ReportBaseInfoDTO(fileId);
         reportInfo.setReportName(reportName);
         reportInfo.setReportType(this.matchReportType(reportName));
         reportInfo.setReportDate(this.matchReportDate(reportName));
@@ -108,6 +143,56 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
     }
 
     /**
+     * 构建只有两列表格的dto数据对象,如果有分级基金时
+     *
+     * @param <DTO>    泛型对象
+     * @param fileId   文件id
+     * @param tables   表格
+     * @param clazz    泛型对象
+     * @param function 表格转换的函数
+     * @return /
+     */
+    protected <DTO extends BaseReportLevelDTO<?>> List<DTO> buildLevelDto(Integer fileId, List<Table> tables, Class<DTO> clazz,
+                                                                          Function<Table, Map<String, Object>> function) {
+        // 映射转换
+        List<DTO> dtos = tables.stream().filter(Objects::nonNull)
+                .map(e -> this.buildDto(fileId, e, clazz, function)).collect(Collectors.toList());
+        // 分级基金匹配
+        List<String> levels = this.matchTieredFund(String.join(",", this.textList));
+        levels.add(0, "母基金");
+        for (int i = 0; i < dtos.size(); i++) {
+            if (levels.size() <= i) {
+                continue;
+            }
+            dtos.get(i).setLevel(levels.get(i));
+        }
+        return dtos;
+    }
+
+    /**
+     * 构建只有两列表格的dto数据对象
+     *
+     * @param <DTO>    泛型对象
+     * @param fileId   文件id
+     * @param table    表格
+     * @param clazz    泛型对象
+     * @param function 表格转换的函数
+     * @return /
+     */
+    private <DTO extends BaseReportDTO<?>> DTO buildDto(Integer fileId, Table table, Class<DTO> clazz,
+                                                        Function<Table, Map<String, Object>> function) {
+        try {
+            Map<String, Object> extInfoMap = function == null ? MapUtil.empty() : function.apply(table);
+            DTO dto = clazz.getDeclaredConstructor().newInstance();
+            dto.setFileId(fileId);
+            this.buildInfo(extInfoMap, dto);
+            return dto;
+        } catch (Exception ignored) {
+        }
+        return null;
+    }
+
+    /**
      * 匹配分级基金名称
      *
      * @param text 文本内容
@@ -135,38 +220,6 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
     }
 
     /**
-     * 匹配报告名称
-     *
-     * @param text 文本内容
-     * @return /
-     */
-    private String matchReportName(String text) {
-        if (StrUtil.isBlank(text)) {
-            return null;
-        }
-        // 编译正则表达式模式
-        Pattern pat1 = Pattern.compile(".+?报([告表])?\\d{4}(\\.?\\d{1,2}(\\.?\\d{2})?)?");
-        Pattern pat2 = Pattern.compile("私募.*披露年度报[告表]((\\d{4}-\\d{2}-\\d{2}至\\d{4}-\\d{2}-\\d{2}))?");
-        Pattern pat3 = Pattern.compile(".+?报([告表])?\\d{4}-\\d{2}-\\d{2}至\\d{4}-\\d{2}-\\d{2}?");
-        // 创建Matcher对象
-        Matcher matcher1 = pat1.matcher(text);
-        Matcher matcher2 = pat2.matcher(text);
-        Matcher matcher3 = pat3.matcher(text);
-        // 尝试匹配
-        String reportName;
-        if (matcher1.find()) {
-            reportName = matcher1.group();
-        } else if (matcher2.find()) {
-            reportName = matcher2.group();
-        } else if (matcher3.find()) {
-            reportName = matcher3.group();
-        } else {
-            reportName = text;
-        }
-        return reportName.replace("(", "(").replace(")", ")").trim();
-    }
-
-    /**
      * 匹配报告日期
      *
      * @param string 文本内容
@@ -181,11 +234,13 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         Pattern pat2 = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");  // 2023-12-31
         Pattern pat3 = Pattern.compile("(2\\d{3})年年度");  // 2023年年度
         Pattern pat4 = Pattern.compile("(\\d{4})年(\\d{1,2})月");  // 2023年12月
+        Pattern pat5 = Pattern.compile("\\d{4}\\d{2}\\d{2}");  // 20231231
         // 创建Matcher对象
         Matcher matcher1 = pat1.matcher(string);
         Matcher matcher2 = pat2.matcher(string);
         Matcher matcher3 = pat3.matcher(string);
         Matcher matcher4 = pat4.matcher(string);
+        Matcher matcher5 = pat5.matcher(string);
         // 尝试匹配
         if (matcher1.find()) {
             String year = matcher1.group(1);
@@ -199,6 +254,8 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
             };
         } else if (matcher2.find()) {
             return matcher2.group();
+        } else if (matcher5.find()) {
+            return matcher5.group();
         } else if (matcher3.find()) {
             return matcher3.group(1) + "-12-31";
         } else if (matcher4.find()) {
@@ -221,8 +278,10 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
         if (string == null) {
             return null;
         }
+        // 所有报告的正则识别方式
+        String patterns = ReportType.getAllPatterns();
         // 编译正则表达式模式
-        Pattern pattern = Pattern.compile("月|季度|年度");
+        Pattern pattern = Pattern.compile(patterns);
         // 创建Matcher对象
         Matcher matcher = pattern.matcher(string);
         // 尝试匹配

+ 204 - 9
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDAnnuallyReportParser.java

@@ -1,17 +1,35 @@
 package com.simuwang.daq.components.report.parser.pdf;
 
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.StrUtil;
 import com.simuwang.base.mapper.EmailFieldMappingMapper;
-import com.simuwang.base.pojo.dto.report.AnnuallyReportData;
-import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
-import com.simuwang.base.pojo.dto.report.ReportParserParams;
+import com.simuwang.base.pojo.dto.report.*;
 import com.simuwang.daq.components.report.parser.ReportParserConstant;
 import org.springframework.stereotype.Component;
+import technology.tabula.RectangularTextContainer;
 import technology.tabula.Table;
 
+import java.awt.geom.Rectangle2D;
+import java.util.Comparator;
 import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
 
+/**
+ * @author wangzaijun
+ * @date 2024/10/10 17:34
+ * @description 年报解析逻辑:基本信息被拆分为多个表格,财务报表未解析
+ */
 @Component(ReportParserConstant.PARSER_PDF_ANNUALLY)
 public class PDAnnuallyReportParser extends AbstractPDReportParser<AnnuallyReportData> {
+    private final List<Table> fundInfoTables = ListUtil.list(true);
+    private final List<Table> shareChangeTables = ListUtil.list(true);
+    private final List<Table> assetAllocationTables = ListUtil.list(true);
+    private final List<Table> investmentIndustryTables = ListUtil.list(true);
+    private final List<Table> financialIndicatorsTables = ListUtil.list(true);
+
     public PDAnnuallyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
         super(fieldMappingMapper);
     }
@@ -23,18 +41,195 @@ public class PDAnnuallyReportParser extends AbstractPDReportParser<AnnuallyRepor
 
     @Override
     protected void initTableInfo(List<Table> tables) {
-        for (Table table : tables) {
-            System.out.println(table.getColCount() + "," + table.getRowCount());
+        for (int i = 0; i < tables.size(); i++) {
+            Table table = tables.get(i);
+            if (i <= 1) {
+                this.fundInfoTables.add(table);
+                continue;
+            }
+            int colCount = table.getColCount();
+            if (colCount == 2) {
+                // 用表格的第二行第一列的数据判断是否份额变动记录
+                String text = this.cleaningValue(table.getCell(1, 0).getText());
+                if (StrUtil.contains(text, "份额")) {
+                    this.shareChangeTables.add(table);
+                }
+            } else if (colCount == 4) {
+                // 用表格的第一行第一列的数据判断是否主要财务指标数据
+                String text = this.cleaningValue(table.getCell(0, 0).getText());
+                if (StrUtil.contains(text, "期间数据和指标")) {
+                    this.financialIndicatorsTables.add(table);
+                    continue;
+                }
+                // 用表格的第一行第二列的数据判断是否行业配置数据
+                text = this.cleaningValue(table.getCell(0, 1).getText());
+                if (StrUtil.contains(text, "行业类别")) {
+                    this.investmentIndustryTables.add(table);
+                }
+            } else if (colCount == 3) {
+                // 用表格的第一行第一列的数据判断是否行业配置数据
+                String text = this.cleaningValue(table.getCell(0, 0).getText());
+                if (StrUtil.contains(text, "行业类别")) {
+                    this.investmentIndustryTables.add(table);
+                    continue;
+                }
+                // 资产配置表格识别(兼容跨页的表格)获取表格中第二列的所有文字,判断所有文字中包含"股权投资"等字符串
+                List<String> details = ListUtil.list(false);
+                for (@SuppressWarnings("all") List<RectangularTextContainer> row : table.getRows()) {
+                    String detail = this.cleaningValue(row.get(1).getText(), false);
+                    if (StrUtil.isNotBlank(detail)) {
+                        details.add(detail);
+                    }
+                }
+                if (CollUtil.containsAny(details, ListUtil.of("股权投资", "股票投资", "债券投资"))) {
+                    this.assetAllocationTables.add(table);
+                }
+            }
+        }
+    }
+
+    @Override
+    protected ReportFundInfoDTO buildFundInfo(ReportParserParams params) {
+        Map<String, Object> fundInfoMap = MapUtil.newHashMap(32);
+        for (Table table : this.fundInfoTables) {
+            Map<String, Object> temp = this.parseFundInfo(table);
+            fundInfoMap.putAll(temp);
         }
+        ReportFundInfoDTO info = new ReportFundInfoDTO(params.getFileId());
+        this.buildInfo(fundInfoMap, info);
+        return info;
     }
 
     @Override
-    protected ReportFundInfoDTO parseFundInfo(ReportParserParams params) {
-        return null;
+    protected Map<String, Object> parseFundInfo(Table fundInfoTable) {
+        // 季报和年报的基金基本信息是两列的表格
+        Map<String, Object> baseInfoMap = MapUtil.newHashMap(32);
+        for (int i = 0; i < fundInfoTable.getRows().size(); i++) {
+            @SuppressWarnings("all")
+            List<RectangularTextContainer> cols = fundInfoTable.getRows().get(i);
+            for (int j = 0; j < 1; j++) {
+                baseInfoMap.put(cols.get(j).getText(), cols.get(j + 1).getText());
+            }
+        }
+        return baseInfoMap;
     }
 
     @Override
-    protected AnnuallyReportData parseExtInfoAndSetData(String reportName, ReportFundInfoDTO fundInfo, ReportParserParams params) {
-        return null;
+    protected AnnuallyReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) {
+        Integer fileId = reportInfo.getFileId();
+        // 表格转换数据获取函数
+        Function<Table, Map<String, Object>> function = t -> {
+            Map<String, Object> extInfoMap = MapUtil.newHashMap(16);
+            for (int i = 0; i < t.getRowCount(); i++) {
+                String key = t.getCell(i, 0).getText();
+                String value = t.getCell(i, 1).getText();
+                extInfoMap.put(key, value);
+            }
+            return extInfoMap;
+        };
+        // 份额变动
+        List<ReportShareChangeDTO> shareChanges = this.buildLevelDto(fileId, this.shareChangeTables,
+                ReportShareChangeDTO.class, function);
+//        // 主要财务指标
+//        List<ReportFinancialIndicatorsDTO> financialIndicators = this.buildLevelDto(fileId, this.financialIndicatorsTables,
+//                ReportFinancialIndicatorsDTO.class, function);
+        // 资产配置
+        List<ReportAssetAllocationDTO> assetAllocations = this.buildAssetAllocationInfo(fileId);
+        // 行业配置
+        List<ReportInvestmentIndustryDTO> investmentIndustries = this.buildInvestmentIndustryInfo(fileId);
+        // 返回数据构建
+        AnnuallyReportData reportData = new AnnuallyReportData(reportInfo, fundInfo);
+        reportData.setShareChange(shareChanges);
+        reportData.setFinancialIndicators(null); // todo 财务指标
+        reportData.setAssetAllocation(assetAllocations);
+        reportData.setInvestmentIndustry(investmentIndustries);
+        return reportData;
+    }
+
+    @Override
+    protected void cleaningReportData(AnnuallyReportData reportData) {
+        // todo 数据清洗
+    }
+
+    /**
+     * 构建基金行业配置解析数据
+     *
+     * @return /
+     */
+    private List<ReportInvestmentIndustryDTO> buildInvestmentIndustryInfo(Integer fileId) {
+        List<ReportInvestmentIndustryDTO> dtos = ListUtil.list(false);
+        for (Table table : this.investmentIndustryTables) {
+            int colCount = table.getColCount();
+            // 投资地区: 1-境内, 2-港股通
+            int investType = colCount == 4 ? 1 : 2;
+            int j = colCount == 4 ? 1 : 0;
+            // 按行遍历
+            for (int i = 0; i < table.getRowCount(); i++) {
+                String text = this.cleaningValue(table.getCell(i, 0).getText());
+                if (StrUtil.containsAny(text, "序号", "行业类别")) {
+                    continue;
+                }
+                ReportInvestmentIndustryDTO dto = new ReportInvestmentIndustryDTO(fileId);
+                dto.setInvestType(investType);
+                dto.setIndustryName(this.cleaningValue(table.getCell(i, j).getText()));
+                dto.setMarketValue(this.cleaningValue(table.getCell(i, j + 1).getText()));
+                dto.setRatio(this.cleaningValue(table.getCell(i, j + 2).getText()));
+                dtos.add(dto);
+            }
+        }
+        return dtos;
+    }
+
+    /**
+     * 构建基金资产配置解析数据
+     *
+     * @param fileId 文件id
+     * @return /
+     */
+    private List<ReportAssetAllocationDTO> buildAssetAllocationInfo(Integer fileId) {
+        List<ReportAssetAllocationDTO> dtos = ListUtil.list(false);
+        String assetType = null;
+        for (Table table : this.assetAllocationTables) {
+            // 按行遍历
+            for (@SuppressWarnings("all") List<RectangularTextContainer> row : table.getRows()) {
+                // x坐标升序(防止部分行乱序问题)
+                row.sort(Comparator.comparing(Rectangle2D.Float::getX));
+                // 大类
+                String type = this.cleaningValue(row.get(0).getText());
+                if (StrUtil.isNotBlank(type)) {
+                    assetType = type;
+                }
+                // 金额、市值,有时是 “备注#金额”的格式
+                String marketValueAndRemark = this.cleaningValue(row.get(2).getText());
+                if (StrUtil.isBlank(marketValueAndRemark) || StrUtil.isBlank(assetType)) {
+                    continue;
+                }
+                // 资产明细
+                String detail = this.cleaningValue(row.get(1).getText(), false);
+                if (StrUtil.contains(marketValueAndRemark, "#")) {
+                    // 有#表示有备注,而且可能有多个,多个用分号分隔的.
+                    List<String> marketValueAndRemarks = StrUtil.split(marketValueAndRemark, ";");
+                    for (String mr : marketValueAndRemarks) {
+                        if (StrUtil.isBlank(mr)) {
+                            continue;
+                        }
+                        List<String> mrs = StrUtil.split(mr, "#");
+                        ReportAssetAllocationDTO dto = new ReportAssetAllocationDTO(fileId);
+                        dto.setAssetType(assetType);
+                        dto.setAssetDetails(detail);
+                        dto.setMarketValue(mrs.get(1));
+                        dto.setRemark(mrs.get(0));
+                        dtos.add(dto);
+                    }
+                } else {
+                    ReportAssetAllocationDTO dto = new ReportAssetAllocationDTO(fileId);
+                    dto.setAssetType(assetType);
+                    dto.setAssetDetails(detail);
+                    dto.setMarketValue(marketValueAndRemark);
+                    dtos.add(dto);
+                }
+            }
+        }
+        return dtos;
     }
 }

+ 21 - 42
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDMonthlyReportParser.java

@@ -2,12 +2,11 @@ package com.simuwang.daq.components.report.parser.pdf;
 
 import cn.hutool.core.collection.ListUtil;
 import cn.hutool.core.map.MapUtil;
-import com.simuwang.base.common.exception.APIException;
 import com.simuwang.base.mapper.EmailFieldMappingMapper;
 import com.simuwang.base.pojo.dto.report.MonthlyReportData;
+import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
 import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
 import com.simuwang.base.pojo.dto.report.ReportNetReportDTO;
-import com.simuwang.base.pojo.dto.report.ReportParserParams;
 import com.simuwang.daq.components.report.parser.ReportParserConstant;
 import org.springframework.stereotype.Component;
 import technology.tabula.RectangularTextContainer;
@@ -24,7 +23,6 @@ import java.util.Map;
 @Component(ReportParserConstant.PARSER_PDF_MONTHLY)
 public class PDMonthlyReportParser extends AbstractPDReportParser<MonthlyReportData> {
     private final List<Table> extNavTables = ListUtil.list(true);
-    private Table fundInfoTable = null;
 
     public PDMonthlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
         super(fieldMappingMapper);
@@ -40,6 +38,10 @@ public class PDMonthlyReportParser extends AbstractPDReportParser<MonthlyReportD
         // 一般月报是固定的模板,4列表格是基金基本信息,其他5列的表格是月净值
         for (Table table : tables) {
             int colCount = table.getColCount();
+            int rowCount = table.getRowCount();
+            if (colCount == 0 && rowCount == 0) {
+                continue;
+            }
             if (colCount == 4) {
                 this.fundInfoTable = table;
             } else if (colCount >= 5) {
@@ -49,11 +51,8 @@ public class PDMonthlyReportParser extends AbstractPDReportParser<MonthlyReportD
     }
 
     @Override
-    protected ReportFundInfoDTO parseFundInfo(ReportParserParams params) {
-        Table fundInfoTable = this.fundInfoTable;
-        if (fundInfoTable == null) {
-            throw new APIException("未解析到基本信息表格");
-        }
+    protected Map<String, Object> parseFundInfo(Table fundInfoTable) {
+        // 月报的基金基本信息是四列的表格
         Map<String, Object> baseInfoMap = MapUtil.newHashMap(32);
         for (int i = 0; i < fundInfoTable.getRows().size(); i++) {
             @SuppressWarnings("all")
@@ -62,44 +61,24 @@ public class PDMonthlyReportParser extends AbstractPDReportParser<MonthlyReportD
                 baseInfoMap.put(cols.get(j * 2).getText(), cols.get(j * 2 + 1).getText());
             }
         }
-        // 匹配字段清洗字段
-        ReportFundInfoDTO reportFundInfo = new ReportFundInfoDTO();
-        reportFundInfo.setFileId(params.getFileId());
-        this.buildInfo(baseInfoMap, reportFundInfo);
-        return reportFundInfo;
+        return baseInfoMap;
     }
 
     @Override
-    protected MonthlyReportData parseExtInfoAndSetData(String reportName, ReportFundInfoDTO fundInfo, ReportParserParams params) {
-        Integer fileId = params.getFileId();
-        MonthlyReportData reportData = new MonthlyReportData();
-        reportData.setBaseInfo(this.buildReportInfo(fileId, reportName));
-        reportData.setFundInfo(fundInfo);
+    protected MonthlyReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) {
+        MonthlyReportData reportData = new MonthlyReportData(reportInfo, fundInfo);
         // 母基金和分级基金的净值
-        List<ReportNetReportDTO> exts = ListUtil.list(false);
-        List<Table> extNavTables = this.extNavTables;
-        for (Table extNavTable : extNavTables) {
-            Map<String, Object> extInfoMap = MapUtil.newHashMap(16);
-            for (int i = 0; i < extNavTable.getColCount(); i++) {
-                String key = extNavTable.getCell(0, i).getText();
-                String value = extNavTable.getCell(1, i).getText();
-                extInfoMap.put(key, value);
-            }
-            ReportNetReportDTO navInfo = new ReportNetReportDTO();
-            navInfo.setFileId(fileId);
-            this.buildInfo(extInfoMap, navInfo);
-            exts.add(navInfo);
-        }
-        // 分级基金匹配
-        List<String> levels = this.matchTieredFund(String.join(",", this.textList));
-        levels.add(0, "母基金");
-        for (int i = 0; i < exts.size(); i++) {
-            if (levels.size() <= i) {
-                continue;
-            }
-            exts.get(i).setLevel(levels.get(i));
-        }
-        reportData.setNetReport(exts);
+        List<ReportNetReportDTO> dtos = this.buildLevelDto(reportInfo.getFileId(), this.extNavTables,
+                ReportNetReportDTO.class, t -> {
+                    Map<String, Object> extInfoMap = MapUtil.newHashMap(16);
+                    for (int i = 0; i < t.getColCount(); i++) {
+                        String key = t.getCell(0, i).getText();
+                        String value = t.getCell(1, i).getText();
+                        extInfoMap.put(key, value);
+                    }
+                    return extInfoMap;
+                });
+        reportData.setNetReport(dtos);
         return reportData;
     }
 

+ 173 - 8
service-daq/src/main/java/com/simuwang/daq/components/report/parser/pdf/PDQuarterlyReportParser.java

@@ -1,14 +1,20 @@
 package com.simuwang.daq.components.report.parser.pdf;
 
+import cn.hutool.core.collection.ListUtil;
+import cn.hutool.core.map.MapUtil;
+import cn.hutool.core.util.StrUtil;
 import com.simuwang.base.mapper.EmailFieldMappingMapper;
-import com.simuwang.base.pojo.dto.report.QuarterlyReportData;
-import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
-import com.simuwang.base.pojo.dto.report.ReportParserParams;
+import com.simuwang.base.pojo.dto.report.*;
 import com.simuwang.daq.components.report.parser.ReportParserConstant;
 import org.springframework.stereotype.Component;
+import technology.tabula.RectangularTextContainer;
 import technology.tabula.Table;
 
+import java.awt.geom.Rectangle2D;
+import java.util.Comparator;
 import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
 
 /**
  * @author wangzaijun
@@ -17,6 +23,12 @@ import java.util.List;
  */
 @Component(ReportParserConstant.PARSER_PDF_QUARTERLY)
 public class PDQuarterlyReportParser extends AbstractPDReportParser<QuarterlyReportData> {
+    private final List<Table> navPerformanceTables = ListUtil.list(true);
+    private final List<Table> financialIndicatorsTables = ListUtil.list(true);
+    private final List<Table> shareChangeTables = ListUtil.list(true);
+    private final List<Table> assetAllocationTables = ListUtil.list(true);
+    private final List<Table> investmentIndustryTables = ListUtil.list(true);
+
     public PDQuarterlyReportParser(EmailFieldMappingMapper fieldMappingMapper) {
         super(fieldMappingMapper);
     }
@@ -29,17 +41,170 @@ public class PDQuarterlyReportParser extends AbstractPDReportParser<QuarterlyRep
     @Override
     protected void initTableInfo(List<Table> tables) {
         for (Table table : tables) {
-            System.out.println(table.getColCount() + "," + table.getRowCount());
+            int colCount = table.getColCount();
+            int rowCount = table.getRowCount();
+            if (colCount == 0 && rowCount == 0) {
+                continue;
+            }
+            if (rowCount == 13 && colCount == 2) {
+                this.fundInfoTable = table;
+            } else if (colCount == 5) {
+                // 净值表现(未入库)
+                this.navPerformanceTables.add(table);
+            } else if (colCount == 2) {
+                // 用表格的第二行第一列的数据判断是否份额变动记录
+                String text = this.cleaningValue(table.getCell(1, 0).getText());
+                // 主要财务指标或份额变动
+                if (StrUtil.contains(text, "份额")) {
+                    this.shareChangeTables.add(table);
+                } else {
+                    this.financialIndicatorsTables.add(table);
+                }
+            } else if (colCount == 4) {
+                // 行业配置
+                this.investmentIndustryTables.add(table);
+            } else if (colCount == 3) {
+                // 用表格的第一个单元格判断是否资产配置表
+                String text = this.cleaningValue(table.getCell(0, 0).getText());
+                if (StrUtil.contains(text, "行业类别")) {
+                    this.investmentIndustryTables.add(table);
+                } else {
+                    this.assetAllocationTables.add(table);
+                }
+            }
+        }
+    }
+
+    @Override
+    protected Map<String, Object> parseFundInfo(Table fundInfoTable) {
+        // 季报和年报的基金基本信息是两列的表格
+        Map<String, Object> baseInfoMap = MapUtil.newHashMap(32);
+        for (int i = 0; i < fundInfoTable.getRows().size(); i++) {
+            @SuppressWarnings("all")
+            List<RectangularTextContainer> cols = fundInfoTable.getRows().get(i);
+            for (int j = 0; j < 1; j++) {
+                baseInfoMap.put(cols.get(j).getText(), cols.get(j + 1).getText());
+            }
         }
+        return baseInfoMap;
     }
 
     @Override
-    protected ReportFundInfoDTO parseFundInfo(ReportParserParams params) {
-        return null;
+    protected QuarterlyReportData parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo) {
+        Integer fileId = reportInfo.getFileId();
+        // 表格转换数据获取函数
+        Function<Table, Map<String, Object>> function = t -> {
+            Map<String, Object> extInfoMap = MapUtil.newHashMap(16);
+            for (int i = 0; i < t.getRowCount(); i++) {
+                String key = t.getCell(i, 0).getText();
+                String value = t.getCell(i, 1).getText();
+                extInfoMap.put(key, value);
+            }
+            return extInfoMap;
+        };
+        // 份额变动
+        List<ReportShareChangeDTO> shareChanges = this.buildLevelDto(fileId, this.shareChangeTables,
+                ReportShareChangeDTO.class, function);
+        // 主要财务指标
+        List<ReportFinancialIndicatorsDTO> financialIndicators = this.buildLevelDto(fileId, this.financialIndicatorsTables,
+                ReportFinancialIndicatorsDTO.class, function);
+        // 资产配置
+        List<ReportAssetAllocationDTO> assetAllocations = this.buildAssetAllocationInfo(fileId);
+        // 行业配置
+        List<ReportInvestmentIndustryDTO> investmentIndustries = this.buildInvestmentIndustryInfo(fileId);
+        // 返回数据构建
+        QuarterlyReportData reportData = new QuarterlyReportData(reportInfo, fundInfo);
+        reportData.setShareChange(shareChanges);
+        reportData.setFinancialIndicators(financialIndicators);
+        reportData.setAssetAllocation(assetAllocations);
+        reportData.setInvestmentIndustry(investmentIndustries);
+        return reportData;
     }
 
     @Override
-    protected QuarterlyReportData parseExtInfoAndSetData(String reportName, ReportFundInfoDTO fundInfo, ReportParserParams params) {
-        return null;
+    protected void cleaningReportData(QuarterlyReportData reportData) {
+        // todo 数据清洗
+    }
+
+    /**
+     * 构建基金行业配置解析数据
+     *
+     * @return /
+     */
+    private List<ReportInvestmentIndustryDTO> buildInvestmentIndustryInfo(Integer fileId) {
+        List<ReportInvestmentIndustryDTO> dtos = ListUtil.list(false);
+        for (Table table : this.investmentIndustryTables) {
+            int colCount = table.getColCount();
+            // 投资地区: 1-境内, 2-港股通
+            int investType = colCount == 4 ? 1 : 2;
+            int j = colCount == 4 ? 1 : 0;
+            // 按行遍历
+            for (int i = 0; i < table.getRowCount(); i++) {
+                String text = this.cleaningValue(table.getCell(i, 0).getText());
+                if (StrUtil.containsAny(text, "序号", "行业类别")) {
+                    continue;
+                }
+                ReportInvestmentIndustryDTO dto = new ReportInvestmentIndustryDTO(fileId);
+                dto.setInvestType(investType);
+                dto.setIndustryName(this.cleaningValue(table.getCell(i, j).getText()));
+                dto.setMarketValue(this.cleaningValue(table.getCell(i, j + 1).getText()));
+                dto.setRatio(this.cleaningValue(table.getCell(i, j + 2).getText()));
+                dtos.add(dto);
+            }
+        }
+        return dtos;
+    }
+
+    /**
+     * 构建基金资产配置解析数据
+     *
+     * @param fileId 文件id
+     * @return /
+     */
+    private List<ReportAssetAllocationDTO> buildAssetAllocationInfo(Integer fileId) {
+        List<ReportAssetAllocationDTO> dtos = ListUtil.list(false);
+        String assetType = null;
+        for (Table table : this.assetAllocationTables) {
+            // 按行遍历
+            for (@SuppressWarnings("all") List<RectangularTextContainer> row : table.getRows()) {
+                // x坐标升序(防止部分行乱序问题)
+                row.sort(Comparator.comparing(Rectangle2D.Float::getX));
+                // 大类
+                String type = this.cleaningValue(row.get(0).getText());
+                if (StrUtil.isNotBlank(type)) {
+                    assetType = type;
+                }
+                // 金额、市值,有时是 “备注#金额”的格式
+                String marketValueAndRemark = this.cleaningValue(row.get(2).getText());
+                if (StrUtil.isBlank(marketValueAndRemark) || StrUtil.isBlank(assetType)) {
+                    continue;
+                }
+                // 资产明细
+                String detail = this.cleaningValue(row.get(1).getText(), false);
+                if (StrUtil.contains(marketValueAndRemark, "#")) {
+                    // 有#表示有备注,而且可能有多个,多个用分号分隔的.
+                    List<String> marketValueAndRemarks = StrUtil.split(marketValueAndRemark, ";");
+                    for (String mr : marketValueAndRemarks) {
+                        if (StrUtil.isBlank(mr)) {
+                            continue;
+                        }
+                        List<String> mrs = StrUtil.split(mr, "#");
+                        ReportAssetAllocationDTO dto = new ReportAssetAllocationDTO(fileId);
+                        dto.setAssetType(assetType);
+                        dto.setAssetDetails(detail);
+                        dto.setMarketValue(mrs.get(1));
+                        dto.setRemark(mrs.get(0));
+                        dtos.add(dto);
+                    }
+                } else {
+                    ReportAssetAllocationDTO dto = new ReportAssetAllocationDTO(fileId);
+                    dto.setAssetType(assetType);
+                    dto.setAssetDetails(detail);
+                    dto.setMarketValue(marketValueAndRemark);
+                    dtos.add(dto);
+                }
+            }
+        }
+        return dtos;
     }
 }

+ 17 - 21
service-daq/src/main/java/com/simuwang/daq/service/EmailParseService.java

@@ -367,9 +367,9 @@ public class EmailParseService {
         }
         // 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
         ReportType reportType = ReportType.MONTHLY;
-        if (fileName.contains(ReportType.QUARTERLY.getPattern())) {
+        if (StrUtil.containsAny(fileName, ReportType.QUARTERLY.getPatterns())) {
             reportType = ReportType.QUARTERLY;
-        } else if (fileName.contains(ReportType.ANNUALLY.getPattern())) {
+        } else if (StrUtil.containsAny(fileName, ReportType.ANNUALLY.getPatterns())) {
             reportType = ReportType.ANNUALLY;
         }
         // 解析器--如果开启python解析则直接调用python接口,否则根据文件后缀获取对应解析器
@@ -390,32 +390,28 @@ public class EmailParseService {
                     .filepath(emailContentInfoDTO.getFilePath()).registerNumber(registerNumber).build();
             ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
             reportData = instance.parse(params);
-            if (log.isInfoEnabled()) {
-                log.info("报告{}解析器{}的解析结果为:{}", params, instance.getParser(), reportData);
-            }
         } catch (Exception e) {
             log.error("报告{}解析失败\n{}", params, ExceptionUtil.stacktraceToString(e));
         } finally {
             parserWatch.stop();
             if (log.isInfoEnabled()) {
-                log.info("报告{}解析完成,耗时{}ms", params, parserWatch.getTotalTimeMillis());
+                log.info("报告{}解析结果为{},耗时{}ms", params, reportData, parserWatch.getTotalTimeMillis());
             }
         }
         // 保存报告解析结果
-        StopWatch writeWatch = new StopWatch();
-        writeWatch.start();
-        try {
-            ReportWriter<ReportData> instance = this.reportWriterFactory.getInstance(reportType);
-            instance.write(reportData);
-            if (log.isInfoEnabled()) {
-                log.info("报告{}结果保存成功", params);
-            }
-        } catch (Exception e) {
-            log.error("报告{}结果保存失败\n{}", params, ExceptionUtil.stacktraceToString(e));
-        } finally {
-            writeWatch.stop();
-            if (log.isInfoEnabled()) {
-                log.info("报告{}解析结果保存完成,耗时{}ms", params, writeWatch.getTotalTimeMillis());
+        if (reportData != null) {
+            StopWatch writeWatch = new StopWatch();
+            writeWatch.start();
+            try {
+                ReportWriter<ReportData> instance = this.reportWriterFactory.getInstance(reportType);
+                instance.write(reportData);
+            } catch (Exception e) {
+                log.error("报告{}结果保存失败\n{}", params, ExceptionUtil.stacktraceToString(e));
+            } finally {
+                writeWatch.stop();
+                if (log.isInfoEnabled()) {
+                    log.info("报告{}解析结果保存完成,耗时{}ms", params, writeWatch.getTotalTimeMillis());
+                }
             }
         }
         return reportData;
@@ -744,7 +740,7 @@ public class EmailParseService {
     }
 
     public Map<String, List<String>> getEmailFieldMapping() {
-        List<EmailFieldMappingDO> emailFieldMappingDOList = emailFieldMapper.getEmailFieldMapping();
+        List<EmailFieldMappingDO> emailFieldMappingDOList = emailFieldMapper.getEmailFieldMapping(1);
         return emailFieldMappingDOList.stream()
                 .collect(Collectors.toMap(EmailFieldMappingDO::getCode, v -> Arrays.stream(v.getName().split(",")).toList()));
     }

+ 2 - 2
service-deploy/src/test/java/com/simuwang/ApplicationTest.java

@@ -45,8 +45,8 @@ public class ApplicationTest {
     @Test
     public void reportTest() {
         MailboxInfoDTO emailInfoDTO = this.buildMailbox();
-        Date startDate = DateUtil.parse("2024-09-30 08:59:30", DateConst.YYYY_MM_DD_HH_MM_SS);
-        Date endDate = DateUtil.parse("2024-09-30 09:01:00", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date startDate = DateUtil.parse("2024-10-10 16:40:30", DateConst.YYYY_MM_DD_HH_MM_SS);
+        Date endDate = DateUtil.parse("2024-10-10 19:59:30", DateConst.YYYY_MM_DD_HH_MM_SS);
         try {
             emailParseService.parseEmail(emailInfoDTO, startDate, endDate);
         } catch (Exception e) {