Explorar o código

feat: 数据看板接口开发

chenjianhua hai 6 meses
pai
achega
384505a701

+ 0 - 14
service-base/src/main/java/com/simuwang/base/pojo/dto/report/PythonResult.java

@@ -1,14 +0,0 @@
-package com.simuwang.base.pojo.dto.report;
-
-import lombok.Data;
-
-/**
- * FileName: PythonResult
- * Author:   chenjianhua
- * Date:     2024/10/14 15:03
- * Description: ${DESCRIPTION}
- */
-@Data
-public class PythonResult {
-
-}

+ 82 - 167
service-daq/src/main/java/com/simuwang/daq/service/EmailParseService.java

@@ -8,8 +8,6 @@ import cn.hutool.core.date.DateUtil;
 import cn.hutool.core.exceptions.ExceptionUtil;
 import cn.hutool.core.map.MapUtil;
 import cn.hutool.core.util.StrUtil;
-import cn.hutool.http.HttpUtil;
-import cn.hutool.json.JSONUtil;
 import com.simuwang.base.common.conts.*;
 import com.simuwang.base.common.enums.ReportParserFileType;
 import com.simuwang.base.common.enums.ReportType;
@@ -25,14 +23,10 @@ import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
 import com.simuwang.base.pojo.dto.EmailFundNavDTO;
 import com.simuwang.base.pojo.dto.MailboxInfoDTO;
 import com.simuwang.base.pojo.dto.report.ParseResult;
-import com.simuwang.base.pojo.dto.query.DataboardQuery;
-import com.simuwang.base.pojo.dto.report.PythonResult;
 import com.simuwang.base.pojo.dto.report.ReportData;
 import com.simuwang.base.pojo.dto.report.ReportParseStatus;
 import com.simuwang.base.pojo.dto.report.ReportParserParams;
 import com.simuwang.base.pojo.valuation.CmValuationTableAttribute;
-import com.simuwang.base.pojo.vo.*;
-import com.simuwang.daq.components.PythonReportConverter;
 import com.simuwang.daq.components.report.parser.ReportParser;
 import com.simuwang.daq.components.report.parser.ReportParserFactory;
 import com.simuwang.daq.components.report.writer.ReportWriter;
@@ -82,23 +76,18 @@ public class EmailParseService {
     private final ValuationTableMapper valuationTableMapper;
     private final ValuationTableAttributeMapper valuationTableAttributeMapper;
     private final FundPositionDetailMapper fundPositionDetailMapper;
-    private final DistributionMapper distributionMapper;
-    private final CompanyInformationMapper companyInformationMapper;
 
     @Value("${email.file.path}")
     private String path;
 
     @Autowired
     private DaqProperties properties;
-    @Autowired
-    private FundInfoMapper fundInfoMapper;
 
     /* 报告解析和入库的方法 */
     @Autowired
     private ReportParserFactory reportParserFactory;
     @Autowired
     private ReportWriterFactory reportWriterFactory;
-    private String pyBaseUrl;
 
     public EmailParseService(EmailTypeRuleMapper emailTypeRuleMapper, EmailRuleConfig emailRuleConfig,
                              EmailFieldMappingMapper emailFieldMapper, EmailParserFactory emailParserFactory,
@@ -107,7 +96,7 @@ public class EmailParseService {
                              AssetMapper assetMapper, NavMapper navMapper, FundService fundService,
                              FundAliasMapper fundAliasMapper,
                              ValuationTableMapper valuationTableMapper, ValuationTableAttributeMapper valuationTableAttributeMapper,
-                             FundPositionDetailMapper fundPositionDetailMapper, DistributionMapper distributionMapper, CompanyInformationMapper companyInformationMapper) {
+                             FundPositionDetailMapper fundPositionDetailMapper) {
         this.emailTypeRuleMapper = emailTypeRuleMapper;
         this.emailRuleConfig = emailRuleConfig;
         this.emailFieldMapper = emailFieldMapper;
@@ -124,8 +113,6 @@ public class EmailParseService {
         this.valuationTableMapper = valuationTableMapper;
         this.valuationTableAttributeMapper = valuationTableAttributeMapper;
         this.fundPositionDetailMapper = fundPositionDetailMapper;
-        this.distributionMapper = distributionMapper;
-        this.companyInformationMapper = companyInformationMapper;
     }
 
     /**
@@ -194,7 +181,7 @@ public class EmailParseService {
         emailId = saveEmailParseInfo(emailParseInfoDO);
 
         // python 报告解析接口结果
-        List<ReportData> dataList = ListUtil.list(false);
+        List<ParseResult<ReportData>> dataList = ListUtil.list(false);
         for (Map.Entry<EmailContentInfoDTO, List<EmailFundNavDTO>> fileNameNavEntry : fileNameNavMap.entrySet()) {
             // 保存邮件文件表
             EmailContentInfoDTO emailContentInfoDTO = fileNameNavEntry.getKey();
@@ -212,12 +199,10 @@ public class EmailParseService {
             if (CollUtil.isEmpty(fundNavDTOList) && !Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
                 continue;
             }
-            // python接口解析结果
-            ReportData data = this.requestPyAndResult(fileId, emailContentInfoDTO);
-            if (data != null) {
-                // 保存报告解析数据
-                this.reportWriterFactory.getInstance(data.getReportType()).write(data);
-                dataList.add(data);
+            if (Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
+                // 解析结果(可以从python获取或者自行解析)并保存报告
+                ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(fileId, emailContentInfoDTO);
+                dataList.add(parseResult);
             }
             for (EmailFundNavDTO fundNavDTO : fundNavDTOList) {
                 // 设置净值数据的解析状态
@@ -231,17 +216,22 @@ public class EmailParseService {
         // 更新邮件解析结果 -> 当【净值日期】和【备案编码/基金名称】能正常解读,即识别为【成功】
         long successNavCount = fileNameNavMap.values().stream().flatMap(List::stream).filter(e -> e != null && StrUtil.isBlank(e.getFailReason())).count();
         emailParseStatus = successNavCount >= 1 ? EmailParseStatusConst.SUCCESS : EmailParseStatusConst.FAIL;
-        // 报告邮件有一条成功就表示整体成功
-        if (Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType) && CollUtil.isNotEmpty(dataList)) {
-            long count = dataList.size();
-            emailParseStatus = count >= 1 ? EmailParseStatusConst.SUCCESS : EmailParseStatusConst.FAIL;
-        }
         String failReason = null;
         if (emailParseStatus == EmailParseStatusConst.FAIL) {
             // 邮件解析失败时 -> 保存失败原因
             int hasPdfFile = emailContentInfoDTOList.stream().map(EmailContentInfoDTO::getFilePath).anyMatch(ExcelUtil::isPdf) ? 1 : 0;
             List<EmailFundNavDTO> navDTOList = fileNameNavMap.values().stream().flatMap(List::stream).toList();
-            failReason = hasPdfFile == 1 && CollUtil.isEmpty(navDTOList) ? "无法从pdf文件中获取到数据" : navDTOList.stream().map(EmailFundNavDTO::getFailReason).distinct().collect(Collectors.joining("/"));
+            failReason = hasPdfFile == 1 && CollUtil.isEmpty(navDTOList) ? "无法从PDF文件中获取到数据" : navDTOList.stream().map(EmailFundNavDTO::getFailReason).distinct().collect(Collectors.joining("/"));
+        }
+        // 报告邮件有一条失败就表示整个邮件解析失败
+        if (Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType) && CollUtil.isNotEmpty(dataList)) {
+            long sucNum = dataList.stream().filter(e -> Objects.equals(1, e.getStatus())).count();
+            if (sucNum > 0) {
+                emailParseStatus = EmailParseStatusConst.SUCCESS;
+            } else {
+                emailParseStatus = EmailParseStatusConst.FAIL;
+                failReason = dataList.stream().map(ParseResult::getMsg).collect(Collectors.joining("/"));
+            }
         }
         emailParseInfoMapper.updateParseStatus(emailId, emailParseStatus, failReason);
     }
@@ -380,61 +370,80 @@ public class EmailParseService {
         }).collect(Collectors.toList());
     }
 
-    private ReportData requestPyAndResult(int fileId, EmailContentInfoDTO emailContentInfoDTO) {
+    private ParseResult<ReportData> parseReportAndHandleResult(int fileId, EmailContentInfoDTO emailContentInfoDTO) {
+        ParseResult<ReportData> result = new ParseResult<>();
         String fileName = emailContentInfoDTO.getFileName();
         Integer emailType = emailContentInfoDTO.getEmailType();
+        if (!Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType) || StrUtil.isBlank(fileName)) {
+            result.setStatus(ReportParseStatus.NOT_A_REPORT.getCode());
+            result.setMsg(ReportParseStatus.NOT_A_REPORT.getMsg());
+            return result;
+        }
+        Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
+        Matcher matcher = pattern.matcher(fileName);
+        String registerNumber = null;
+        if (matcher.find()) {
+            registerNumber = matcher.group();
+        }
+        // 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
+        ReportType reportType = ReportType.MONTHLY;
+        if (StrUtil.containsAny(fileName, ReportType.QUARTERLY.getPatterns())) {
+            reportType = ReportType.QUARTERLY;
+        } else if (StrUtil.containsAny(fileName, ReportType.ANNUALLY.getPatterns())) {
+            reportType = ReportType.ANNUALLY;
+        }
+        // 解析器--如果开启python解析则直接调用python接口,否则根据文件后缀获取对应解析器
+        ReportParserFileType fileType;
+        if (Objects.equals(Boolean.TRUE, this.properties.getEnablePyParser())) {
+            fileType = ReportParserFileType.PYTHON;
+        } else {
+            String fileSuffix = StrUtil.subAfter(fileName, ".", true);
+            fileType = ReportParserFileType.getBySuffix(fileSuffix);
+        }
+        // 解析报告
+        ReportParserParams params = null;
         ReportData reportData = null;
-        if (Objects.equals(EmailTypeConst.REPORT_EMAIL_TYPE, emailType)) {
-            if (StrUtil.isBlank(fileName)) {
-                return null;
-            }
-            Pattern pattern = Pattern.compile("S(?:[A-Z]{0}[0-9]{5}|[A-Z][0-9]{4}|[A-Z]{2}[0-9]{3}|[A-Z]{3}[0-9]{2})");
-            Matcher matcher = pattern.matcher(fileName);
-            String registerNumber = null;
-            if (matcher.find()) {
-                registerNumber = matcher.group();
-            }
-            int type = 0;
-            if (fileName.contains("季报") || fileName.contains("季度")) {
-                type = 1;
-            } else if (fileName.contains("年报") || fileName.contains("年度")) {
-                type = 2;
-            }
-            String api = "/api/v1/parse/amac_report";
-            Map<String, Object> params = MapUtil.newHashMap(16);
-            params.put("file_id", fileId);
-            params.put("file_path", emailContentInfoDTO.getFilePath());
-            params.put("register_number", registerNumber);
-            params.put("file_type", type);
-            params.put("file_name", fileName);
-            if (StrUtil.isNotBlank(registerNumber)) {
-                FundAndCompanyInfoDO info = this.fundInfoMapper.queryFundAndTrustByRegisterNumber(registerNumber);
-                if (info != null) {
-                    params.put("fund_name", info.getFundName());
-                    params.put("trust_name", info.getCompanyName());
-                }
+        StopWatch parserWatch = new StopWatch();
+        parserWatch.start();
+        try {
+            params = ReportParserParams.builder().fileId(fileId).filename(fileName)
+                    .filepath(emailContentInfoDTO.getFilePath()).registerNumber(registerNumber).build();
+            ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
+            reportData = instance.parse(params);
+            result.setStatus(1);
+            result.setMsg("报告解析成功");
+            result.setData(reportData);
+        } catch (ReportParseException e) {
+            log.error("报告{}解析失败\n{}", params, e.getMsg());
+            result.setStatus(e.getCode());
+            result.setMsg(e.getMsg());
+        } catch (Exception e) {
+            log.error("报告{}解析失败\n{}", params, ExceptionUtil.stacktraceToString(e));
+            result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
+            result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
+        } finally {
+            parserWatch.stop();
+            if (log.isInfoEnabled()) {
+                log.info("报告{}解析结果为{},耗时{}ms", params, reportData, parserWatch.getTotalTimeMillis());
             }
-            long millis = System.currentTimeMillis();
+        }
+        // 保存报告解析结果
+        if (reportData != null) {
+            StopWatch writeWatch = new StopWatch();
+            writeWatch.start();
             try {
-                String body = HttpUtil.post(this.pyBaseUrl + api, JSONUtil.toJsonStr(params));
-                PythonResult<?> result = PythonReportConverter.convert(JSONUtil.parseObj(body), type);
-                if (!Objects.equals(1, result.getStatus())) {
-                    log.warn("报告{} 解析失败:{}", params, result.getMsg());
-                    return null;
-                }
-                reportData = result.getData();
-                if (log.isInfoEnabled()) {
-                    log.info("报告{}结果为:\n{}", params, reportData);
-                }
+                ReportWriter<ReportData> instance = this.reportWriterFactory.getInstance(reportType);
+                instance.write(reportData);
             } catch (Exception e) {
-                log.error("请求python的报告解析接口报错\n{}", ExceptionUtil.stacktraceToString(e));
+                log.error("报告{}结果保存失败\n{}", params, ExceptionUtil.stacktraceToString(e));
             } finally {
+                writeWatch.stop();
                 if (log.isInfoEnabled()) {
-                    log.info("当前报告{}解析完成,总计耗时{}ms", params, (System.currentTimeMillis() - millis));
+                    log.info("报告{}解析结果保存完成,耗时{}ms", params, writeWatch.getTotalTimeMillis());
                 }
             }
         }
-        return reportData;
+        return result;
     }
 
     private void saveNavAndAssetNet(Integer fileId, List<EmailFundNavDTO> fundNavDTOList, Date parseDate) {
@@ -848,8 +857,8 @@ public class EmailParseService {
                     emailContentInfoDTOList.add(emailContentInfoDTO);
                 }
                 if (CollUtil.isNotEmpty(emailContentInfoDTOList)) {
-                    // 估值表邮件不展示正文html文件
-                    if (emailType.equals(EmailTypeConst.VALUATION_EMAIL_TYPE)) {
+                    // 估值表或定期报告邮件不展示正文html文件
+                    if (emailType.equals(EmailTypeConst.VALUATION_EMAIL_TYPE) || emailType.equals(EmailTypeConst.REPORT_EMAIL_TYPE)) {
                         emailContentInfoDTOList = emailContentInfoDTOList.stream().filter(e -> !ExcelUtil.isHTML(e.getFilePath())).toList();
                     }
                     emailContentInfoDTOList.forEach(e -> {
@@ -898,100 +907,6 @@ public class EmailParseService {
         }
     }
 
-    public EmailParseCountBoardVO searchEmailCount(DataboardQuery databoardQuery) {
-        List<Map<String, Object>> dataList = emailParseInfoMapper.searchEmailDataBoard(databoardQuery);
-        EmailParseCountBoardVO result = new EmailParseCountBoardVO();
-        Integer total = 0;
-        for(Map<String, Object> data : dataList){
-            if(1 == ((Integer)data.get("parse_status")).intValue()){
-                result.setSuccess(((Long)data.get("total")).intValue());
-            }else{
-                result.setFail(((Long)data.get("total")).intValue());
-            }
-            total+=((Long)data.get("total")).intValue();
-        }
-        result.setTotal(total);
-        return result;
-    }
-
-    public EmailParseTypeBoardVO searchEmailTypeCount(DataboardQuery databoardQuery) {
-        //邮件类型,1-净值,2-估值表,3-定期报告
-        List<Map<String, Object>> dataList = emailParseInfoMapper.searchEmailTypeCount(databoardQuery);
-        EmailParseTypeBoardVO result = new EmailParseTypeBoardVO();
-        Integer total = 0;
-        for(Map<String, Object> data : dataList){
-            Integer emailType = (Integer) data.get("email_type");
-            Long totalType = (Long)data.get("total");
-            if(1 == emailType){
-                result.setNav(totalType.intValue());
-            }else if(2 == emailType){
-                result.setValuation(totalType.intValue());
-            }else{
-                result.setReport(totalType.intValue());
-            }
-            total+=totalType.intValue();
-        }
-        result.setTotal(total);
-        return result;
-    }
-
-    public EmailParseFailAnalysisVO parseFailAnalysis(DataboardQuery databoardQuery) {
-        EmailParseFailAnalysisVO emailParseFailAnalysisVO = new EmailParseFailAnalysisVO();
-        if(databoardQuery.getEmailType() == null || databoardQuery.getEmailType().equals(1)){
-            NavFailAnalysisVO navFailAnalysisVO = new NavFailAnalysisVO();
-            Long pdfNoData = emailParseInfoMapper.countpdfNoData(databoardQuery,"无法从PDF文件中获取到数据");
-            navFailAnalysisVO.setPdfNoData(pdfNoData);
-            Long priceDateMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"缺少净值日期");
-            navFailAnalysisVO.setPriceDateMiss(priceDateMiss);
-            Long navMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"单位净值和累计净值和资产净值均缺失");
-            navFailAnalysisVO.setNavMiss(navMiss);
-            Long fundNameNumberMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"单位净值和累计净值和资产净值均缺失");
-            navFailAnalysisVO.setFundNameNumberMiss(fundNameNumberMiss);
-            emailParseFailAnalysisVO.setNavFailAnalysisVO(navFailAnalysisVO);
-        }else if(databoardQuery.getEmailType().equals(2)){
-            ValuationFailAnalysisVO valuationFailAnalysisVO = new ValuationFailAnalysisVO();
-            Long fileTypeError = emailParseInfoMapper.countpdfNoData(databoardQuery,"文件格式错误");
-            valuationFailAnalysisVO.setFileTypeError(fileTypeError);
-            Long columnMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"无市值列或无数量列");
-            valuationFailAnalysisVO.setColumnMiss(columnMiss);
-            Long numbericMiss = emailParseInfoMapper.countpdfNoData(databoardQuery,"非数值数据");
-            valuationFailAnalysisVO.setNumbericMiss(numbericMiss);
-            Long noData = emailParseInfoMapper.countpdfNoData(databoardQuery,"无数据");
-            valuationFailAnalysisVO.setNoData(noData);
-            Long templateError = emailParseInfoMapper.countpdfNoData(databoardQuery,"模板不支持");
-            valuationFailAnalysisVO.setTemplateError(templateError);
-            emailParseFailAnalysisVO.setValuationFailAnalysisVO(valuationFailAnalysisVO);
-        }else if(databoardQuery.getEmailType().equals(3)){
-            ReportFailAnalysisVO reportFailAnalysisVO = new ReportFailAnalysisVO();
-            Long scannedFile = emailParseInfoMapper.countpdfNoData(databoardQuery,"报告为扫描件");
-            reportFailAnalysisVO.setScannedFile(scannedFile);
-            Long errorAmacFileType = emailParseInfoMapper.countpdfNoData(databoardQuery,"报告不是基协统一格式");
-            reportFailAnalysisVO.setErrorAmacFileType(errorAmacFileType);
-            Long watermarkFileError = emailParseInfoMapper.countpdfNoData(databoardQuery,"报告水印干扰导致部分没有解析");
-            reportFailAnalysisVO.setWatermarkFileError(watermarkFileError);
-            Long noReport = emailParseInfoMapper.countpdfNoData(databoardQuery,"报告不是定期报告");
-            reportFailAnalysisVO.setNoReport(noReport);
-            emailParseFailAnalysisVO.setReportFailAnalysisVO(reportFailAnalysisVO);
-        }
-        return emailParseFailAnalysisVO;
-    }
-
-    public EmailParseDataViewVO dataOverview(DataboardQuery databoardQuery) {
-        EmailParseDataViewVO dataViewVO = new EmailParseDataViewVO();
-        dataViewVO.setEmailNum(emailParseInfoMapper.countEmailTotal(null));
-        dataViewVO.setNavEmailNum(emailParseInfoMapper.countEmailTotal(1));
-        dataViewVO.setValuationEmailNum(emailParseInfoMapper.countEmailTotal(2));
-        dataViewVO.setReportEmailNum(emailParseInfoMapper.countEmailTotal(3));
-        dataViewVO.setParseNavNum(emailFundNavMapper.countEmailNavTotal());
-        dataViewVO.setParseAssetNum(emailFundAssetMapper.countEmailAssetTotal());
-        dataViewVO.setNavNum(navMapper.countNavTotal());
-        dataViewVO.setAssetNum(assetMapper.countAssetTotal());
-        dataViewVO.setDistribute(distributionMapper.countDistributionTotal());
-        dataViewVO.setFundNum(fundInfoMapper.countFundTotal());
-        dataViewVO.setCompanyNum(companyInformationMapper.countCompanyTotal());
-        return dataViewVO;
-    }
-
     private static class PythonData {
         private Integer fileId;
         private Integer status;