|
@@ -1,275 +0,0 @@
|
|
|
-package com.simuwang.daq.components.report.parser.pdf;
|
|
|
-
|
|
|
-import cn.hutool.core.collection.CollUtil;
|
|
|
-import com.simuwang.base.mapper.EmailFieldMappingMapper;
|
|
|
-import com.simuwang.base.pojo.dos.EmailFieldMappingDO;
|
|
|
-import com.simuwang.base.pojo.dto.report.ReportBaseInfoDTO;
|
|
|
-import com.simuwang.base.pojo.dto.report.ReportData;
|
|
|
-import com.simuwang.base.pojo.dto.report.ReportFundInfoDTO;
|
|
|
-import com.simuwang.base.pojo.dto.report.ReportParserParams;
|
|
|
-import com.simuwang.daq.components.report.parser.ReportParser;
|
|
|
-import com.smppw.common.pojo.ValueLabelVO;
|
|
|
-import org.slf4j.Logger;
|
|
|
-import org.slf4j.LoggerFactory;
|
|
|
-
|
|
|
-import java.io.IOException;
|
|
|
-import java.util.Calendar;
|
|
|
-import java.util.List;
|
|
|
-import java.util.regex.Matcher;
|
|
|
-import java.util.regex.Pattern;
|
|
|
-import java.util.stream.Collectors;
|
|
|
-
|
|
|
-public abstract class AbstractReportParser<T extends ReportData> implements ReportParser<T> {
|
|
|
- protected final Logger logger = LoggerFactory.getLogger(this.getClass());
|
|
|
-
|
|
|
- private final EmailFieldMappingMapper fieldMappingMapper;
|
|
|
- /**
|
|
|
- * 字段匹配规则
|
|
|
- */
|
|
|
- protected List<ValueLabelVO> fieldMapper;
|
|
|
-
|
|
|
- public AbstractReportParser(EmailFieldMappingMapper fieldMappingMapper) {
|
|
|
- this.fieldMappingMapper = fieldMappingMapper;
|
|
|
- }
|
|
|
-
|
|
|
- @Override
|
|
|
- public T parse(ReportParserParams params) throws IOException {
|
|
|
- List<EmailFieldMappingDO> emailFieldMapping = this.fieldMappingMapper.getEmailFieldMapping();
|
|
|
- if (CollUtil.isEmpty(emailFieldMapping)) {
|
|
|
- this.logger.error("未设置报告解析规则!");
|
|
|
- return null;
|
|
|
- }
|
|
|
- this.fieldMapper = emailFieldMapping.stream().map(e -> new ValueLabelVO(e.getCode(), e.getName())).collect(Collectors.toList());
|
|
|
- String reportName = this.initAndGetReportName(params);
|
|
|
- ReportBaseInfoDTO reportInfo = this.buildReportInfo(params, reportName);
|
|
|
- ReportFundInfoDTO reportFundInfo = this.parseBaseInfo(params);
|
|
|
- return this.parseExtInfoAndSetData(reportInfo, reportFundInfo, params);
|
|
|
- }
|
|
|
-
|
|
|
- protected abstract String initAndGetReportName(ReportParserParams params) throws IOException;
|
|
|
-
|
|
|
- private ReportBaseInfoDTO buildReportInfo(ReportParserParams params, String reportName) {
|
|
|
- ReportBaseInfoDTO reportInfo = new ReportBaseInfoDTO();
|
|
|
- reportInfo.setFileId(params.getFileId());
|
|
|
- reportInfo.setReportName(reportName);
|
|
|
- reportInfo.setReportType(this.matchReportType(reportName));
|
|
|
- reportInfo.setReportDate(this.matchReportDate(reportName));
|
|
|
- return reportInfo;
|
|
|
- }
|
|
|
-
|
|
|
- protected abstract ReportFundInfoDTO parseBaseInfo(ReportParserParams params);
|
|
|
-
|
|
|
- protected abstract T parseExtInfoAndSetData(ReportBaseInfoDTO baseInfo, ReportFundInfoDTO fundInfo, ReportParserParams params);
|
|
|
-
|
|
|
-// protected abstract List<EXT> parseExtInfo();
|
|
|
-
|
|
|
-// protected abstract void saveResult(ReportInfo reportInfo, ReportFundInfo reportFundInfo, List<EXT> exts);
|
|
|
-
|
|
|
-// private Map<String, List<String>> generateWatermarkMap(String watermarkName) {
|
|
|
-// Map<String, List<String>> result = MapUtil.newHashMap(32);
|
|
|
-// // 生成水印列表
|
|
|
-// String text = watermarkName;
|
|
|
-// text = text.replaceAll("[()]", ""); // 移除括号
|
|
|
-// List<String> textList = new ArrayList<>(new HashSet<>(convertStringToList(text)));
|
|
|
-// Collections.reverse(textList);
|
|
|
-// StringBuilder sb = new StringBuilder(textList.size());
|
|
|
-// for (String ch : textList) {
|
|
|
-// sb.append(ch);
|
|
|
-// }
|
|
|
-// String joinedText = sb.toString();
|
|
|
-//
|
|
|
-// // 基本水印列表
|
|
|
-// List<String> wkList = new ArrayList<>();
|
|
|
-// for (String ch : textList) {
|
|
|
-// wkList.add(ch + "\r\n");
|
|
|
-// wkList.add("\r\n" + ch);
|
|
|
-// }
|
|
|
-//
|
|
|
-// // 查找数字
|
|
|
-// List<String> matches = findDigits(watermarkName);
|
|
|
-// if (!matches.isEmpty()) {
|
|
|
-// for (String match : matches) {
|
|
|
-// wkList.add("\r\n" + match);
|
|
|
-// wkList.add(match + "\r\n");
|
|
|
-// }
|
|
|
-// }
|
|
|
-// wkList.add("-");
|
|
|
-// wkList.add("【");
|
|
|
-// wkList.add("】");
|
|
|
-// wkList.add("\r");
|
|
|
-// wkList.add("\n");
|
|
|
-// wkList.add("\r\n");
|
|
|
-//
|
|
|
-// String noNumberText = removeDigits(joinedText);
|
|
|
-//
|
|
|
-// // 生成不同字段的水印列表
|
|
|
-// result.put("report_name", new ArrayList<>(wkList));
|
|
|
-// result.get("report_name").addAll(convertStringToList("有限公司"));
|
|
|
-//
|
|
|
-// result.put("less", new ArrayList<>(wkList));
|
|
|
-//
|
|
|
-// result.put("more", new ArrayList<>(wkList));
|
|
|
-// result.get("more").addAll(convertStringToList(noNumberText));
|
|
|
-//
|
|
|
-// result.put("leverage", new ArrayList<>(wkList));
|
|
|
-// result.get("leverage").addAll(convertStringToList(removeKeywords(noNumberText, "基金资产")));
|
|
|
-//
|
|
|
-// result.put("base_info", new ArrayList<>(wkList));
|
|
|
-// result.get("base_info").addAll(convertStringToList(removeKeywords(text, "基", "金", "投资", "管理", "有", "份", "融", "资", "产", "本", "号", "收益", "策略", "期")));
|
|
|
-//
|
|
|
-// result.put("industry", new ArrayList<>(wkList));
|
|
|
-// result.get("industry").addAll(convertStringToList(removeKeywords(noNumberText, "基金融公产")));
|
|
|
-//
|
|
|
-// result.put("market_value", new ArrayList<>(Collections.singletonList("\n")));
|
|
|
-// return result;
|
|
|
-// }
|
|
|
-
|
|
|
-// private List<String> findDigits(String text) {
|
|
|
-// List<String> digits = new ArrayList<>();
|
|
|
-// Pattern pattern = Pattern.compile("\\d");
|
|
|
-// Matcher matcher = pattern.matcher(text);
|
|
|
-// while (matcher.find()) {
|
|
|
-// digits.add(matcher.group());
|
|
|
-// }
|
|
|
-// return digits;
|
|
|
-// }
|
|
|
-//
|
|
|
-// private String removeDigits(String text) {
|
|
|
-// return text.replaceAll("\\d", "");
|
|
|
-// }
|
|
|
-//
|
|
|
-// private String removeKeywords(String text, String... keywords) {
|
|
|
-// for (String keyword : keywords) {
|
|
|
-// text = text.replaceAll(keyword, "");
|
|
|
-// }
|
|
|
-// return text;
|
|
|
-// }
|
|
|
-//
|
|
|
-// private List<String> convertStringToList(String text) {
|
|
|
-// List<String> charList = new ArrayList<>();
|
|
|
-// for (char c : text.toCharArray()) {
|
|
|
-// charList.add(c + "");
|
|
|
-// }
|
|
|
-// return charList;
|
|
|
-// }
|
|
|
-
|
|
|
-// protected String processString(List<String> wmList, String string) {
|
|
|
-// if (StrUtil.isBlank(string)) {
|
|
|
-// return null;
|
|
|
-// }
|
|
|
-// // 生成正则表达式模式
|
|
|
-// String pat = String.join("|", wmList);
|
|
|
-// // 使用正则表达式移除wmList中的元素
|
|
|
-// string = removeMatches(string, pat);
|
|
|
-// // 替换中文括号为英文括号
|
|
|
-// string = string.replace("(", "(").replace(")", ")");
|
|
|
-// // 移除空格
|
|
|
-// string = string.replace(" ", "");
|
|
|
-// // 如果字符串以括号开头,则移除第一个字符
|
|
|
-// if (startsWithParenthesis(string)) {
|
|
|
-// string = string.substring(1);
|
|
|
-// }
|
|
|
-//
|
|
|
-// return string;
|
|
|
-// }
|
|
|
-
|
|
|
-// private String removeMatches(String input, String pattern) {
|
|
|
-// // 编译正则表达式
|
|
|
-// Pattern compiledPattern = Pattern.compile(pattern);
|
|
|
-// // 创建Matcher对象
|
|
|
-// Matcher matcher = compiledPattern.matcher(input);
|
|
|
-// // 使用replaceAll方法替换所有匹配到的字符为空字符串
|
|
|
-// return matcher.replaceAll("");
|
|
|
-// }
|
|
|
-//
|
|
|
-// private boolean startsWithParenthesis(String input) {
|
|
|
-// // 匹配以括号开头的字符串
|
|
|
-// Pattern pattern = Pattern.compile("^[()].*");
|
|
|
-// Matcher matcher = pattern.matcher(input);
|
|
|
-// return matcher.find();
|
|
|
-// }
|
|
|
-
|
|
|
- /**
|
|
|
- * 匹配报告日期
|
|
|
- *
|
|
|
- * @param string 文本内容
|
|
|
- * @return 报告日期
|
|
|
- */
|
|
|
- private String matchReportDate(String string) {
|
|
|
- if (string == null) {
|
|
|
- return null;
|
|
|
- }
|
|
|
-
|
|
|
- // 编译正则表达式模式
|
|
|
- Pattern pat1 = Pattern.compile("(2\\d{3}).*([一二三四1234])季度"); // 2023年XXX3季度
|
|
|
- Pattern pat2 = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); // 2023-12-31
|
|
|
- Pattern pat3 = Pattern.compile("(2\\d{3})年年度"); // 2023年年度
|
|
|
- Pattern pat4 = Pattern.compile("(\\d{4})年(\\d{1,2})月"); // 2023年12月
|
|
|
-
|
|
|
- // 创建Matcher对象
|
|
|
- Matcher matcher1 = pat1.matcher(string);
|
|
|
- Matcher matcher2 = pat2.matcher(string);
|
|
|
- Matcher matcher3 = pat3.matcher(string);
|
|
|
- Matcher matcher4 = pat4.matcher(string);
|
|
|
-
|
|
|
- // 尝试匹配
|
|
|
- if (matcher1.find()) {
|
|
|
- String year = matcher1.group(1);
|
|
|
- String quarter = matcher1.group(2);
|
|
|
- return switch (quarter) {
|
|
|
- case "一", "1" -> year + "-03-31";
|
|
|
- case "二", "2" -> year + "-06-30";
|
|
|
- case "三", "3" -> year + "-09-30";
|
|
|
- case "四", "4" -> year + "-12-31";
|
|
|
- default -> null;
|
|
|
- };
|
|
|
- } else if (matcher2.find()) {
|
|
|
- return matcher2.group();
|
|
|
- } else if (matcher3.find()) {
|
|
|
- return matcher3.group(1) + "-12-31";
|
|
|
- } else if (matcher4.find()) {
|
|
|
- String year = matcher4.group(1);
|
|
|
- String month = matcher4.group(2);
|
|
|
- int lastDayOfMonth = getLastDayOfMonth(Integer.parseInt(year), Integer.parseInt(month));
|
|
|
- return year + "-" + padZero(month) + "-" + padZero(lastDayOfMonth + "");
|
|
|
- } else {
|
|
|
- return null;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 匹配报告类型,如“季度”、“年度”
|
|
|
- *
|
|
|
- * @param string 输入字符串
|
|
|
- * @return 匹配到的报告类型子字符串,如果没有匹配到则返回null
|
|
|
- */
|
|
|
- private String matchReportType(String string) {
|
|
|
- if (string == null) {
|
|
|
- return null;
|
|
|
- }
|
|
|
-
|
|
|
- // 编译正则表达式模式
|
|
|
- Pattern pattern = Pattern.compile("月|季度|年度");
|
|
|
-
|
|
|
- // 创建Matcher对象
|
|
|
- Matcher matcher = pattern.matcher(string);
|
|
|
-
|
|
|
- // 尝试匹配
|
|
|
- if (matcher.find()) {
|
|
|
- return matcher.group();
|
|
|
- } else {
|
|
|
- return null;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private int getLastDayOfMonth(int year, int month) {
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- calendar.set(Calendar.YEAR, year);
|
|
|
- calendar.set(Calendar.MONTH, month - 1); // Calendar.MONTH 是从0开始的
|
|
|
- return calendar.getActualMaximum(Calendar.DAY_OF_MONTH);
|
|
|
- }
|
|
|
-
|
|
|
- private String padZero(String number) {
|
|
|
- return String.format("%02d", Integer.parseInt(number));
|
|
|
- }
|
|
|
-}
|