Kaynağa Gözat

feat:报告解析

wangzaijun 3 hafta önce
ebeveyn
işleme
5d5407f377

+ 3 - 1
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/AbstractReportParser.java

@@ -72,7 +72,9 @@ public abstract class AbstractReportParser<T extends ReportData> implements Repo
      * @param infoMap 表格转换的函数
      * @return /
      */
-    protected <DTO extends BaseReportDTO<?>> DTO buildDto(Integer fileId, Class<DTO> clazz, Map<String, Object> infoMap) {
+    protected <DTO extends BaseReportDTO<?>> DTO buildDto(Integer fileId,
+                                                          Class<DTO> clazz,
+                                                          Map<String, Object> infoMap) {
         try {
             DTO dto = clazz.getDeclaredConstructor().newInstance();
             dto.setFileId(fileId);

+ 9 - 4
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/AbstractPDReportParser.java

@@ -84,7 +84,8 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
                 if (i >= 1 && params.getReportType() == ReportType.LETTER) {
                     break;
                 }
-                Integer rows = tableList.stream().map(Table::getRowCount).filter(rowCount -> rowCount >= 1).reduce(0, Integer::sum);
+                Integer rows = tableList.stream().map(Table::getRowCount)
+                        .filter(rowCount -> rowCount >= 1).reduce(0, Integer::sum);
                 if (rows >= 1) {
                     for (Table table : tableList) {
                         int rowCount = table.getRowCount();
@@ -105,7 +106,8 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
                         String content = StrUtil.split(jsonObject.getStr("content"), "```").get(1);
                         this.aiParserContent = "{" + StrUtil.subAfter(content, "{", false) + "}";
                     } catch (Exception e) {
-                        this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}", filename, body, ExceptionUtil.stacktraceToString(e));
+                        this.logger.warn("{} ai解析失败,解析结果{},错误原因:{}",
+                                filename, body, ExceptionUtil.stacktraceToString(e));
                     }
                 }
                 i++;
@@ -154,7 +156,8 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
      * @param fundInfo   报告中基金基本信息
      * @return /
      */
-    protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo);
+    protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO reportInfo,
+                                                ReportFundInfoDTO fundInfo);
 
     @Override
     protected void cleaningReportData(T reportData) {
@@ -181,7 +184,9 @@ public abstract class AbstractPDReportParser<T extends ReportData> extends Abstr
      * @param function 表格转换的函数
      * @return /
      */
-    protected <DTO extends BaseReportLevelDTO<?>> List<DTO> buildLevelDto(Integer fileId, List<Table> tables, Class<DTO> clazz,
+    protected <DTO extends BaseReportLevelDTO<?>> List<DTO> buildLevelDto(Integer fileId,
+                                                                          List<Table> tables,
+                                                                          Class<DTO> clazz,
                                                                           Function<Table, Map<String, Object>> function) {
         List<DTO> dtos = ListUtil.list(true);
         // 信息表格字段和值映射

+ 10 - 26
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDAnnuallyReportParser.java

@@ -54,34 +54,14 @@ public class PDAnnuallyReportParser extends PDQuarterlyReportParser<AnnuallyRepo
             // 用表格的第一列的数据判断是否主要财务指标数据
             List<String> texts = this.getTableColTexts(table, 0);
             if (CollUtil.containsAny(texts, ReportParseUtils.FINANCIAL_INDICATORS_COLUMN_NAMES)) {
-                if (table.getRowCount() == 10) {
-                    fi++;
-                    this.financialIndicatorsTables.add(table);
-                } else {
-                    List<Table> tempList = spanningPageFinancialIndicatorsTableMap.getOrDefault(fi, ListUtil.list(true));
-                    tempList.add(table);
-                    spanningPageFinancialIndicatorsTableMap.putIfAbsent(fi, tempList);
-                    if (tempList.size() == 2) {
-                        fi++;
-                    }
-                }
+                this.splitTables(table, 10, fi, this.financialIndicatorsTables, spanningPageFinancialIndicatorsTableMap);
                 continue;
             }
             int colCount = table.getColCount();
             if (colCount == 2) {
                 // 用表格的第一列的数据判断是否份额变动记录
                 if (CollUtil.containsAny(texts, ReportParseUtils.SHARE_CHANGE_COLUMN_NAMES)) {
-                    if (table.getRowCount() == 5) {
-                        sci++;
-                        this.shareChangeTables.add(table);
-                    } else {
-                        List<Table> tempList = spanningPageShareChangeTableMap.getOrDefault(sci, ListUtil.list(true));
-                        tempList.add(table);
-                        spanningPageShareChangeTableMap.putIfAbsent(sci, tempList);
-                        if (tempList.size() == 2) {
-                            sci++;
-                        }
-                    }
+                    this.splitTables(table, 5, sci, this.shareChangeTables, spanningPageShareChangeTableMap);
                 }
             } else if (colCount == 4) {
                 // 用表格的第二列的数据判断是否行业配置数据(内地)
@@ -122,9 +102,12 @@ public class PDAnnuallyReportParser extends PDQuarterlyReportParser<AnnuallyRepo
     }
 
     @Override
-    protected AnnuallyReportData buildReportData(ReportBaseInfoDTO reportInfo, ReportFundInfoDTO fundInfo,
-                                                 List<ReportShareChangeDTO> shareChanges, List<ReportFinancialIndicatorsDTO> financialIndicators,
-                                                 List<ReportAssetAllocationDTO> assetAllocations, List<ReportInvestmentIndustryDTO> investmentIndustries) {
+    protected AnnuallyReportData buildReportData(ReportBaseInfoDTO reportInfo,
+                                                 ReportFundInfoDTO fundInfo,
+                                                 List<ReportShareChangeDTO> shareChanges,
+                                                 List<ReportFinancialIndicatorsDTO> financialIndicators,
+                                                 List<ReportAssetAllocationDTO> assetAllocations,
+                                                 List<ReportInvestmentIndustryDTO> investmentIndustries) {
         AnnuallyReportData reportData = new AnnuallyReportData(reportInfo, fundInfo);
         reportData.setShareChange(shareChanges);
         reportData.setFinancialIndicators(financialIndicators);
@@ -133,7 +116,8 @@ public class PDAnnuallyReportParser extends PDQuarterlyReportParser<AnnuallyRepo
         return reportData;
     }
 
-    protected List<ReportFinancialIndicatorsDTO> buildFinancialIndicatorsInfo(Integer fileId, Function<Table, Map<String, Object>> function) {
+    protected List<ReportFinancialIndicatorsDTO> buildFinancialIndicatorsInfo(Integer fileId,
+                                                                              Function<Table, Map<String, Object>> function) {
         List<ReportFinancialIndicatorsDTO> dtos = ListUtil.list(false);
         // 分级基金
         List<String> levels = ReportParseUtils.matchTieredFund(String.join(",", this.textList));

+ 46 - 4
mo-daq/src/main/java/com/smppw/modaq/application/components/report/parser/pdf/PDQuarterlyReportParser.java

@@ -55,6 +55,10 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
 
     @Override
     protected void initTableInfo(List<Table> tables) {
+        Map<Integer, List<Table>> spanningPageFinancialIndicatorsTableMap = MapUtil.newHashMap(8, true);
+        Map<Integer, List<Table>> spanningPageShareChangeTableMap = MapUtil.newHashMap(8, true);
+        int fi = 0;
+        int sci = 0;
         for (Table table : tables) {
             int colCount = table.getColCount();
             int rowCount = table.getRowCount();
@@ -68,9 +72,9 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
                 List<String> texts = this.getTableColTexts(table, 0);
                 // 主要财务指标或份额变动
                 if (CollUtil.containsAny(texts, ReportParseUtils.SHARE_CHANGE_COLUMN_NAMES)) {
-                    this.shareChangeTables.add(table);
+                    sci = splitTables(table, 5, sci, this.shareChangeTables, spanningPageShareChangeTableMap);
                 } else if (CollUtil.containsAny(texts, ReportParseUtils.FINANCIAL_INDICATORS_COLUMN_NAMES)) {
-                    this.financialIndicatorsTables.add(table);
+                    fi = splitTables(table, 10, fi, this.financialIndicatorsTables, spanningPageFinancialIndicatorsTableMap);
                 }
             } else if (colCount == 4) {
                 // 行业配置
@@ -89,6 +93,10 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
                 }
             }
         }
+        // 跨页的财务信息记录表(包括表头一共有10行)
+        this.handleSpanningPageTables(this.financialIndicatorsTables, spanningPageFinancialIndicatorsTableMap);
+        // 跨页的份额变动记录表(包括表头一共有5行)
+        this.handleSpanningPageTables(this.shareChangeTables, spanningPageShareChangeTableMap);
     }
 
     @Override
@@ -148,7 +156,8 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
      * @param function 字段映射关系
      * @return /
      */
-    protected List<ReportFinancialIndicatorsDTO> buildFinancialIndicatorsInfo(Integer fileId, Function<Table, Map<String, Object>> function) {
+    protected List<ReportFinancialIndicatorsDTO> buildFinancialIndicatorsInfo(Integer fileId,
+                                                                              Function<Table, Map<String, Object>> function) {
         return this.buildLevelDto(fileId, this.financialIndicatorsTables, ReportFinancialIndicatorsDTO.class, function);
     }
 
@@ -283,7 +292,40 @@ public class PDQuarterlyReportParser<T extends QuarterlyReportData> extends Abst
         return details;
     }
 
-    protected void handleSpanningPageTables(List<Table> tables, Map<Integer, List<Table>> spanningPageTableMap) {
+    /**
+     * 判断表格是否需要合并并且把需要合并的表格放在一个索引对应的map中(主要处理有分级基金数据表格,不处理可能会把数据绑定到错误的分级基金中)
+     *
+     * @param table                待判断的表格
+     * @param rowCount             判断依据(一个完整的表格有多少行)
+     * @param index                当前完整表格所在的索引位置
+     * @param tables               不需要合并的表格集合
+     * @param spanningPageTableMap 需要合并的表格数据
+     * @return /
+     */
+    protected int splitTables(Table table, int rowCount, int index,
+                              List<Table> tables, Map<Integer, List<Table>> spanningPageTableMap) {
+        if (table.getRowCount() == rowCount) {
+            index++;
+            tables.add(table);
+        } else {
+            List<Table> tempList = spanningPageTableMap.getOrDefault(index, ListUtil.list(true));
+            tempList.add(table);
+            spanningPageTableMap.putIfAbsent(index, tempList);
+            if (tempList.size() == 2) {
+                index++;
+            }
+        }
+        return index;
+    }
+
+    /**
+     * 把跨页的表格合并为一个并且插入到数据集合中的特定位置
+     *
+     * @param tables               数据集合
+     * @param spanningPageTableMap 跨页的表格对象
+     */
+    protected void handleSpanningPageTables(List<Table> tables,
+                                            Map<Integer, List<Table>> spanningPageTableMap) {
         // 跨页的份额变动记录表(包括表头一共有5行)
         for (Map.Entry<Integer, List<Table>> entry : spanningPageTableMap.entrySet()) {
             List<Table> spanningPageShareChangeTables = entry.getValue();

+ 1 - 1
mo-daq/src/main/java/com/smppw/modaq/domain/service/EmailParseService.java

@@ -415,7 +415,7 @@ public class EmailParseService {
     public Map<Integer, List<String>> getEmailType() {
         Map<Integer, List<String>> emailTypeMap = MapUtil.newHashMap(3, true);
         emailTypeMap.put(EmailTypeConst.REPORT_EMAIL_TYPE,
-                ListUtil.toList("月报", "月度报告", "年报", "年度报告"));
+                ListUtil.toList("月报", "月度报告", "季报", "季度报告", "年报", "年度报告"));
         emailTypeMap.put(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE,
                 ListUtil.toList("确认单", "确认函", "交易确认数据", "赎回确认", "申购确认", "分红确认", "确认表", "交易确认", "确认"));
         return emailTypeMap;