NavEmailParser.java 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. package com.simuwang.daq.service;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.collection.ListUtil;
  4. import cn.hutool.core.exceptions.ExceptionUtil;
  5. import cn.hutool.core.lang.Pair;
  6. import cn.hutool.core.map.MapUtil;
  7. import cn.hutool.core.util.StrUtil;
  8. import com.simuwang.base.common.conts.EmailDataDirectionConst;
  9. import com.simuwang.base.common.conts.EmailFieldConst;
  10. import com.simuwang.base.common.conts.EmailTypeConst;
  11. import com.simuwang.base.common.util.DateUtils;
  12. import com.simuwang.base.common.util.ExcelUtil;
  13. import com.simuwang.base.common.util.StringUtil;
  14. import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
  15. import com.simuwang.base.pojo.dto.EmailFundNavDTO;
  16. import com.simuwang.base.pojo.dto.FieldPositionDTO;
  17. import org.apache.poi.ss.usermodel.Cell;
  18. import org.apache.poi.ss.usermodel.Row;
  19. import org.apache.poi.ss.usermodel.Sheet;
  20. import org.slf4j.Logger;
  21. import org.slf4j.LoggerFactory;
  22. import org.springframework.beans.factory.annotation.Value;
  23. import org.springframework.stereotype.Component;
  24. import java.io.File;
  25. import java.util.*;
  26. import java.util.stream.Collectors;
  27. /**
  28. * @author mozuwen
  29. * @date 2024-09-04
  30. * @description 净值邮件解析器
  31. */
  32. @Component
  33. public class NavEmailParser extends AbstractEmailParser {
  34. private static final Logger log = LoggerFactory.getLogger(AbstractEmailParser.class);
  35. @Value("${email.file.path}")
  36. private String path;
  37. @Value("${email.parse.force-template-enable}")
  38. private boolean forceTemplateEnable;
  39. private final ValuationEmailParser valuationEmailParser;
  40. private final EmailTemplateService emailTemplateService;
  41. private static final int MAX_COLUMN = 20;
  42. private static final List<String> NOT_CONVERT_FIELD_LIST = ListUtil.toList("TA代码");
  43. public NavEmailParser(ValuationEmailParser valuationEmailParser, EmailTemplateService emailTemplateService) {
  44. this.valuationEmailParser = valuationEmailParser;
  45. this.emailTemplateService = emailTemplateService;
  46. }
  47. @Override
  48. public boolean isSupport(Integer emailType) {
  49. return EmailTypeConst.NAV_EMAIL_TYPE.equals(emailType);
  50. }
  51. @Override
  52. public List<EmailFundNavDTO> parse(EmailContentInfoDTO emailContentInfoDTO, Map<String, List<String>> emailFieldMap) {
  53. List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
  54. String emailContent = emailContentInfoDTO.getEmailContent();
  55. // 1.解析邮件正文
  56. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isHTML(emailContentInfoDTO.getFileName())) {
  57. String excelFilePath = emailContentInfoDTO.getFilePath().replace(".html", ".xlsx");
  58. emailFundNavDTOList = parseEmailContent(emailContent, excelFilePath, emailFieldMap);
  59. }
  60. // 2.解析邮件excel附件
  61. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isExcel(emailContentInfoDTO.getFileName())) {
  62. List<EmailFundNavDTO> fundNavDTOList = parseExcelFile(emailContentInfoDTO.getFilePath(), emailFieldMap);
  63. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  64. }
  65. // 3.解析邮件pdf附件
  66. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && ExcelUtil.isPdf(emailContentInfoDTO.getFileName())) {
  67. String excelFilePath = path + emailContentInfoDTO.getEmailAddress() + "/" + emailContentInfoDTO.getEmailDate().substring(0, 10).replaceAll("-", "")
  68. + "/" + emailContentInfoDTO.getFileName().replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
  69. List<EmailFundNavDTO> fundNavDTOList = parsePdfFile(emailContentInfoDTO.getFilePath(), excelFilePath, emailFieldMap);
  70. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  71. }
  72. // 4.解析邮件zip,rar附件
  73. if (StrUtil.isNotBlank(emailContentInfoDTO.getFilePath()) && (ExcelUtil.isZip(emailContentInfoDTO.getFileName()) || ExcelUtil.isRAR(emailContentInfoDTO.getFileName()))) {
  74. List<EmailFundNavDTO> fundNavDTOList = parsePackageFile(emailContentInfoDTO, emailContentInfoDTO.getFileName(), emailContentInfoDTO.getFilePath(), emailFieldMap);
  75. Optional.ofNullable(fundNavDTOList).ifPresent(emailFundNavDTOList::addAll);
  76. }
  77. // 兼容净值邮件,但附件是估值表的情况
  78. if (CollUtil.isEmpty(emailFundNavDTOList)) {
  79. // 判断文件名是否包含"估值表"
  80. String fileName = emailContentInfoDTO.getFileName();
  81. if (StrUtil.isNotBlank(fileName) && fileName.contains("估值表")) {
  82. emailFundNavDTOList = valuationEmailParser.parse(emailContentInfoDTO, emailFieldMap);
  83. }
  84. }
  85. // email_fund_nav增加template_id字段
  86. if (CollUtil.isNotEmpty(emailFundNavDTOList)) {
  87. emailFundNavDTOList.forEach(e -> e.setTemplateId(0));
  88. }
  89. // 通用模版解析 -> 根据配置的模板进行解析
  90. long successNavCount = 0;
  91. if (CollUtil.isNotEmpty(emailFundNavDTOList)) {
  92. successNavCount = emailFundNavDTOList.stream().filter(e -> e != null && StrUtil.isBlank(e.getFailReason())).count();
  93. }
  94. //如果通用模板解析不到正确数据,就走模板解析一次
  95. if (successNavCount == 0 || forceTemplateEnable) {
  96. List<EmailFundNavDTO> templateFundNavDTOList = emailTemplateService.parseUsingTemplate(emailContentInfoDTO);
  97. if(CollUtil.isNotEmpty(templateFundNavDTOList)){
  98. emailFundNavDTOList.addAll(templateFundNavDTOList);
  99. }
  100. }
  101. return emailFundNavDTOList;
  102. }
  103. private List<EmailFundNavDTO> parsePackageFile(EmailContentInfoDTO emailContentInfoDTO, String fileName, String filePath, Map<String, List<String>> emailFieldMap) {
  104. List<EmailFundNavDTO> emailFundNavDTOList = CollUtil.newArrayList();
  105. if(ExcelUtil.isZip(filePath)){
  106. String destPath = filePath.replaceAll(".zip", "").replaceAll(".ZIP", "");
  107. log.info("压缩包地址:{},解压后文件地址:{}", filePath, destPath);
  108. List<String> dir = ExcelUtil.extractCompressedFiles(filePath, destPath);
  109. for (String zipFilePath : dir) {
  110. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, zipFilePath, emailFieldMap));
  111. File file = new File(zipFilePath);
  112. if (file.isDirectory()) {
  113. for (String navFilePath : Objects.requireNonNull(file.list())) {
  114. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, navFilePath, emailFieldMap));
  115. }
  116. }
  117. }
  118. }
  119. try{
  120. if(ExcelUtil.isRAR(filePath)){
  121. String destPath = filePath.replaceAll(".rar", "").replaceAll(".RAR", "");
  122. File destFile = new File(destPath);
  123. if (!destFile.exists()) {
  124. destFile.mkdirs();
  125. }
  126. List<String> rarDir = ExcelUtil.extractRar(filePath, destPath);
  127. for (String rarFilePath : rarDir) {
  128. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, rarFilePath, emailFieldMap));
  129. File file = new File(rarFilePath);
  130. if (file.isDirectory()) {
  131. for (String navFilePath : Objects.requireNonNull(file.list())) {
  132. emailFundNavDTOList.addAll(parseZipFile(emailContentInfoDTO, navFilePath, emailFieldMap));
  133. }
  134. }
  135. }
  136. }
  137. }catch (Exception e){
  138. log.error(e.getMessage(),e);
  139. }
  140. return emailFundNavDTOList;
  141. }
  142. private List<EmailFundNavDTO> parseZipFile(EmailContentInfoDTO emailContentInfoDTO, String zipFilePath, Map<String, List<String>> emailFieldMap) {
  143. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  144. if (ExcelUtil.isPdf(zipFilePath)) {
  145. String excelFilePath = zipFilePath.replace(".pdf", ".xlsx").replace(".PDF", ".xlsx");
  146. fundNavDTOList = parsePdfFile(zipFilePath, excelFilePath, emailFieldMap);
  147. }
  148. if (ExcelUtil.isExcel(zipFilePath)) {
  149. fundNavDTOList = parseExcelFile(zipFilePath, emailFieldMap);
  150. }
  151. if (ExcelUtil.isZip(zipFilePath)) {
  152. String name = new File(zipFilePath).getName();
  153. fundNavDTOList = parsePackageFile(emailContentInfoDTO, name, zipFilePath, emailFieldMap);
  154. }
  155. return fundNavDTOList;
  156. }
  157. private List<EmailFundNavDTO> parsePdfFile(String filePath, String excelFilePath, Map<String, List<String>> emailFieldMap) {
  158. excelFilePath = ExcelUtil.pdfConvertToExcel(filePath, excelFilePath);
  159. if (StrUtil.isBlank(excelFilePath)) {
  160. return CollUtil.newArrayList();
  161. }
  162. return parseExcelFileFrom(excelFilePath, emailFieldMap);
  163. }
  164. private List<EmailFundNavDTO> parseExcelFileFrom(String filePath, Map<String, List<String>> emailFieldMap) {
  165. Sheet sheet = ExcelUtil.getFirstSheet(filePath);
  166. if (sheet == null) {
  167. log.info("获取不到有效的sheet页面,文件路径:{}", filePath);
  168. return CollUtil.newArrayList();
  169. }
  170. // 1.找到表头所在位置
  171. Map<String, Pair<Integer, Integer>> fieldPositionMap = getFieldPositionFromPdf(sheet, emailFieldMap);
  172. if (MapUtil.isEmpty(fieldPositionMap)) {
  173. log.warn("找不到文件表头字段 -> 文件:{}", filePath);
  174. return CollUtil.newArrayList();
  175. }
  176. // 2.解析sheet中的净值数据
  177. List<EmailFundNavDTO> emailFundNavDTOList = parseSheetData(filePath, sheet, fieldPositionMap, null);
  178. // 3.校验净值数据格式 并 设置数据校验不通过的原因
  179. if (CollUtil.isNotEmpty(emailFundNavDTOList)) {
  180. emailFundNavDTOList.forEach(e -> e.setFailReason(super.checkDataFailReason(e)));
  181. }
  182. return emailFundNavDTOList;
  183. }
  184. private Map<String, Pair<Integer, Integer>> getFieldPositionFromPdf(Sheet sheet, Map<String, List<String>> emailFieldMap) {
  185. Map<String, List<FieldPositionDTO>> tempFieldPositionMap = MapUtil.newHashMap();
  186. int lastRowNum = sheet.getLastRowNum();
  187. for (int rowNum = 0; rowNum <= lastRowNum; rowNum++) {
  188. Row sheetRow = sheet.getRow(rowNum);
  189. if (sheetRow == null) {
  190. continue;
  191. }
  192. int lastCellNum = sheetRow.getLastCellNum();
  193. for (int cellNum = 0; cellNum < lastCellNum; cellNum++) {
  194. Cell cell = sheetRow.getCell(cellNum);
  195. if (cell == null) {
  196. continue;
  197. }
  198. String cellValue = ExcelUtil.getCellValue(cell);
  199. // 移除掉非中文字符
  200. String newCellValue = StringUtil.retainChineseCharacters(cellValue, NOT_CONVERT_FIELD_LIST);
  201. String field = fieldMatch(newCellValue, emailFieldMap);
  202. if (StrUtil.isNotBlank(field)) {
  203. List<FieldPositionDTO> fieldPositionDTOList = tempFieldPositionMap.getOrDefault(field, new ArrayList<>());
  204. fieldPositionDTOList.add(new FieldPositionDTO(newCellValue, Pair.of(rowNum, cellNum)));
  205. tempFieldPositionMap.put(field, fieldPositionDTOList);
  206. }
  207. }
  208. }
  209. // 判断是不是份额基金净值文件格式(同时存在两个备案编码字段)
  210. return handlerFieldPositionFromPdf(tempFieldPositionMap);
  211. }
  212. private Map<String, Pair<Integer, Integer>> handlerFieldPositionFromPdf(Map<String, List<FieldPositionDTO>> tempFieldPositionMap) {
  213. Map<String, Pair<Integer, Integer>> fieldPositionMap = MapUtil.newHashMap();
  214. boolean hasParentField = tempFieldPositionMap.keySet().stream().anyMatch(e -> e.contains("parent"));
  215. for (Map.Entry<String, List<FieldPositionDTO>> entry : tempFieldPositionMap.entrySet()) {
  216. String field = entry.getKey();
  217. List<FieldPositionDTO> fieldPositionDTOList = entry.getValue();
  218. int size = fieldPositionDTOList.size();
  219. if (size == 1) {
  220. fieldPositionMap.put(field, fieldPositionDTOList.get(0).getPair());
  221. continue;
  222. }
  223. if ((!hasParentField && size > 1)) {
  224. if (EmailFieldConst.REGISTER_NUMBER.equals(field)) {
  225. Pair<Integer, Integer> pair = fieldPositionDTOList.stream()
  226. .filter(e -> !e.getFieldValue().contains("协会") && !e.getFieldValue().contains("备案")).map(FieldPositionDTO::getPair).findFirst().orElse(null);
  227. fieldPositionMap.put(field, pair);
  228. } else {
  229. fieldPositionMap.put(field, fieldPositionDTOList.get(0).getPair());
  230. }
  231. continue;
  232. }
  233. if ((hasParentField && size > 1)) {
  234. fieldPositionMap.put(field, fieldPositionDTOList.get(0).getPair());
  235. }
  236. }
  237. // 母基金缺少代码的情况
  238. if (hasParentField && fieldPositionMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER) == null) {
  239. List<FieldPositionDTO> fieldPositionDTOS = tempFieldPositionMap.get(EmailFieldConst.REGISTER_NUMBER);
  240. if (CollUtil.isNotEmpty(fieldPositionDTOS)) {
  241. Pair<Integer, Integer> parentRegisterNumberPair = fieldPositionDTOS.stream()
  242. .filter(e -> e.getFieldValue().contains("协会") || e.getFieldValue().contains("备案")).map(FieldPositionDTO::getPair).findFirst().orElse(null);
  243. fieldPositionMap.put(EmailFieldConst.PARENT_REGISTER_NUMBER, parentRegisterNumberPair);
  244. }
  245. }
  246. return fieldPositionMap;
  247. }
  248. /**
  249. * 解析邮件excel附件
  250. *
  251. * @param filePath 邮件excel附件地址
  252. * @param emailFieldMap 邮件字段识别规则映射表
  253. * @return 解析到的净值数据
  254. */
  255. private List<EmailFundNavDTO> parseExcelFile(String filePath, Map<String, List<String>> emailFieldMap) {
  256. Sheet sheet = ExcelUtil.getFirstSheet(filePath);
  257. if (sheet == null) {
  258. log.info("获取不到有效的sheet页面,文件路径:{}", filePath);
  259. return CollUtil.newArrayList();
  260. }
  261. // 1.找到表头所在位置
  262. Map<String, Pair<Integer, Integer>> fieldPositionMap = getFieldPosition(sheet, emailFieldMap);
  263. if (MapUtil.isEmpty(fieldPositionMap)) {
  264. log.warn("找不到文件表头字段 -> 文件:{}", filePath);
  265. return CollUtil.newArrayList();
  266. }
  267. // 2.解析sheet中的净值数据
  268. List<EmailFundNavDTO> emailFundNavDTOList = parseSheetData(filePath, sheet, fieldPositionMap, null);
  269. // 3.校验净值数据格式 并 设置数据校验不通过的原因
  270. if (CollUtil.isNotEmpty(emailFundNavDTOList)) {
  271. emailFundNavDTOList.forEach(e -> e.setFailReason(super.checkDataFailReason(e)));
  272. }
  273. return emailFundNavDTOList;
  274. }
  275. /**
  276. * 解析邮件正文
  277. *
  278. * @param emailContent 正文内容
  279. * @param emailFieldMap 邮件字段识别规则映射表
  280. * @return 解析到的净值数据
  281. */
  282. private List<EmailFundNavDTO> parseEmailContent(String emailContent, String excelFilePath, Map<String, List<String>> emailFieldMap) {
  283. excelFilePath = ExcelUtil.contentConvertToExcel(emailContent, excelFilePath);
  284. if (StrUtil.isBlank(excelFilePath)) {
  285. return CollUtil.newArrayList();
  286. }
  287. return parseExcelFile(excelFilePath, emailFieldMap);
  288. }
  289. /**
  290. * 根据字段所在表格的位置提取净值数据
  291. *
  292. * @param filePath 文件路径·
  293. * @param sheet 表格中的sheet页
  294. * @param fieldPositionMap 字段所在表格中的位置
  295. * @param direction 表格数据的形式:1-行,2-列
  296. * @return 净值数据
  297. */
  298. private List<EmailFundNavDTO> parseSheetData(String filePath, Sheet sheet, Map<String, Pair<Integer, Integer>> fieldPositionMap, Integer direction) {
  299. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  300. // 通过表头所在位置判断是行数据还是列数据
  301. Integer dataDirectionType = direction != null ? direction : ExcelUtil.detectDataDirection(fieldPositionMap);
  302. // 数据起始行,起始列
  303. int initRow = dataDirectionType.equals(EmailDataDirectionConst.ROW_DIRECTION_TYPE) ? fieldPositionMap.values().stream().map(Pair::getKey).max(Integer::compareTo).orElse(0)
  304. : fieldPositionMap.values().stream().map(Pair::getKey).min(Integer::compareTo).orElse(0);
  305. int initColumn = fieldPositionMap.values().stream().map(Pair::getValue).min(Integer::compareTo).orElse(0);
  306. if (dataDirectionType.equals(EmailDataDirectionConst.ROW_DIRECTION_TYPE)) {
  307. // 表头字段-列号映射关系
  308. Map<String, Integer> fieldColumnMap = getFieldRow(fieldPositionMap);
  309. int lastRowNum = sheet.getLastRowNum();
  310. // 遍历可能的数据行
  311. for (int rowNum = initRow + 1; rowNum <= lastRowNum; rowNum++) {
  312. Row sheetRow = sheet.getRow(rowNum);
  313. try {
  314. Optional.ofNullable(readSheetRowData(sheetRow, fieldColumnMap)).ifPresent(fundNavDTOList::addAll);
  315. } catch (Exception e) {
  316. log.error("读取行数据报错 -> 行号:{},文件路径:{},堆栈信息:{}", rowNum, filePath, ExceptionUtil.stacktraceToString(e));
  317. }
  318. }
  319. }
  320. if (dataDirectionType.equals(EmailDataDirectionConst.COLUMN_DIRECTION_TYPE)) {
  321. // 表头字段-行号映射关系
  322. Map<Integer, String> fieldRowMap = getRowField(fieldPositionMap);
  323. int lastRow = fieldPositionMap.values().stream().map(Pair::getKey).max(Integer::compareTo).orElse(0);
  324. // 遍历每一列
  325. for (int columnNum = initColumn + 1; columnNum < EmailDataDirectionConst.MAX_ROW_COLUMN; columnNum++) {
  326. Map<String, String> fieldValueMap = MapUtil.newHashMap();
  327. for (int rowNum = initRow; rowNum <= lastRow; rowNum++) {
  328. Row row = sheet.getRow(rowNum);
  329. Cell cell = row.getCell(columnNum);
  330. if (cell == null) {
  331. continue;
  332. }
  333. fieldValueMap.put(fieldRowMap.get(rowNum), ExcelUtil.getCellValue(cell));
  334. }
  335. Optional.ofNullable(buildEmailFundNavDTO(fieldValueMap)).ifPresent(fundNavDTOList::add);
  336. }
  337. }
  338. // 兼容净值日期为空的情况
  339. addPriceDateIfMiss(fundNavDTOList, getPriceDateFromSheet(sheet, initRow));
  340. return fundNavDTOList;
  341. }
  342. private void addPriceDateIfMiss(List<EmailFundNavDTO> fundNavDTOList, String priceDate) {
  343. if (fundNavDTOList.stream().map(EmailFundNavDTO::getPriceDate).allMatch(StrUtil::isBlank)) {
  344. fundNavDTOList.forEach(e -> e.setPriceDate(priceDate));
  345. }
  346. }
  347. private String getPriceDateFromSheet(Sheet sheet, Integer maxRowNum) {
  348. Map<Integer, String> priceDateMap = MapUtil.newHashMap();
  349. for (int rowNum = 0; rowNum < maxRowNum; rowNum++) {
  350. Row row = sheet.getRow(rowNum);
  351. if (row == null) {
  352. continue;
  353. }
  354. int lastCellNum = row.getLastCellNum();
  355. for (int columnNum = 0; columnNum < lastCellNum; columnNum++) {
  356. Cell cell = row.getCell(columnNum);
  357. if (cell == null) {
  358. continue;
  359. }
  360. String cellValue = ExcelUtil.getCellValue(cell);
  361. if (StrUtil.isNotBlank(cellValue) && cellValue.contains("截至")) {
  362. int index = cellValue.indexOf("截至");
  363. String date = cellValue.substring(index + 2, index + 2 + 10);
  364. if (StrUtil.isNotBlank(date)) {
  365. date = date.replaceAll("年", "-").replaceAll("月", "-");
  366. }
  367. priceDateMap.put(1, date);
  368. continue;
  369. }
  370. String priceDate = DateUtils.stringToDate(cellValue);
  371. if (StrUtil.isNotBlank(priceDate)) {
  372. priceDateMap.put(2, priceDate);
  373. }
  374. }
  375. }
  376. if (MapUtil.isNotEmpty(priceDateMap)) {
  377. Integer key = priceDateMap.keySet().stream().min(Integer::compareTo).orElse(null);
  378. return priceDateMap.get(key);
  379. }
  380. return null;
  381. }
  382. private EmailFundNavDTO buildEmailFundNavDTO(Map<String, String> fieldValueMap) {
  383. if (MapUtil.isEmpty(fieldValueMap) || fieldValueMap.values().stream().allMatch(StrUtil::isBlank)) {
  384. return null;
  385. }
  386. EmailFundNavDTO fundNavDTO = new EmailFundNavDTO();
  387. fundNavDTO.setFundName(fieldValueMap.get(EmailFieldConst.FUND_NAME));
  388. fundNavDTO.setRegisterNumber(fieldValueMap.get(EmailFieldConst.REGISTER_NUMBER));
  389. String priceDate = fieldValueMap.get(EmailFieldConst.PRICE_DATE);
  390. boolean isDateFormat = StrUtil.isNotBlank(priceDate) && StringUtil.isNumeric(priceDate) && StringUtil.compare2NumericValue(priceDate);
  391. if (isDateFormat) {
  392. priceDate = ExcelUtil.convertExcelDateToString(priceDate);
  393. }
  394. fundNavDTO.setPriceDate(priceDate);
  395. fundNavDTO.setNav(fieldValueMap.get(EmailFieldConst.NAV));
  396. fundNavDTO.setCumulativeNavWithdrawal(fieldValueMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL));
  397. // pdf解析到的值带有",",比如:"10,656,097.37"
  398. String assetNet = fieldValueMap.get(EmailFieldConst.ASSET_NET);
  399. fundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
  400. String assetShares = fieldValueMap.get(EmailFieldConst.ASSET_SHARE);
  401. fundNavDTO.setAssetShare(ExcelUtil.numberDataStripCommas(assetShares));
  402. return fundNavDTO;
  403. }
  404. private List<EmailFundNavDTO> readSheetRowData(Row sheetRow, Map<String, Integer> columnFieldMap) {
  405. if (sheetRow == null) {
  406. return null;
  407. }
  408. String nav = columnFieldMap.get(EmailFieldConst.NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.NAV)) != null
  409. ? ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.NAV))) : null;
  410. String cumulativeNavWithdrawal = columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL)) != null ?
  411. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.CUMULATIVE_NAV_WITHDRAWAL))) : null;
  412. String assetNet = columnFieldMap.get(EmailFieldConst.ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET)) != null ?
  413. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_NET))) : null;
  414. List<EmailFundNavDTO> fundNavDTOList = CollUtil.newArrayList();
  415. EmailFundNavDTO emailFundNavDTO = new EmailFundNavDTO();
  416. String priceDate = columnFieldMap.get(EmailFieldConst.PRICE_DATE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE)) != null ?
  417. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE))) : null;
  418. boolean isDateFormat = StrUtil.isNotBlank(priceDate) && StringUtil.isNumeric(priceDate) && StringUtil.compare2NumericValue(priceDate);
  419. if (isDateFormat) {
  420. priceDate = ExcelUtil.convertExcelDateToString(priceDate);
  421. }
  422. priceDate = DateUtils.stringToDate(priceDate);
  423. //如果返回null,则说明日期格式错误,此时需要保留原始值
  424. if(StringUtil.isNull(priceDate)){
  425. priceDate = columnFieldMap.get(EmailFieldConst.PRICE_DATE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE)) != null ?
  426. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PRICE_DATE))) : null;
  427. }
  428. // 份额基金净值文件格式
  429. long parentFiledCount = columnFieldMap.keySet().stream().filter(e -> e.contains("parent")).count();
  430. if (parentFiledCount >= 1) {
  431. Optional.ofNullable(buildParentNav(sheetRow, columnFieldMap, priceDate)).ifPresent(fundNavDTOList::add);
  432. }
  433. emailFundNavDTO.setPriceDate(priceDate);
  434. String fundName = ExcelUtil.getPriorityFieldValue(sheetRow, columnFieldMap.get(EmailFieldConst.LEVEL_FUND_NAME), columnFieldMap.get(EmailFieldConst.FUND_NAME));
  435. emailFundNavDTO.setFundName(fundName);
  436. String registerNumber = ExcelUtil.getPriorityFieldValue(sheetRow, columnFieldMap.get(EmailFieldConst.LEVEL_REGISTER_NUMBER), columnFieldMap.get(EmailFieldConst.REGISTER_NUMBER));
  437. emailFundNavDTO.setRegisterNumber(registerNumber);
  438. emailFundNavDTO.setNav(nav);
  439. emailFundNavDTO.setCumulativeNavWithdrawal(cumulativeNavWithdrawal);
  440. String virtualNav = columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV)) != null ?
  441. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.VIRTUAL_NAV))) : null;
  442. emailFundNavDTO.setVirtualNav(virtualNav);
  443. emailFundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
  444. String assetShares = columnFieldMap.get(EmailFieldConst.ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE)) != null ?
  445. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.ASSET_SHARE))) : null;
  446. emailFundNavDTO.setAssetShare(ExcelUtil.numberDataStripCommas(assetShares));
  447. fundNavDTOList.add(emailFundNavDTO);
  448. return fundNavDTOList;
  449. }
  450. private EmailFundNavDTO buildParentNav(Row sheetRow, Map<String, Integer> columnFieldMap, String priceDate) {
  451. EmailFundNavDTO emailFundNavDTO = new EmailFundNavDTO();
  452. String nav = columnFieldMap.get(EmailFieldConst.PARENT_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_NAV)) != null ?
  453. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_NAV))) : null;
  454. String cumulativeNavWithdrawal = columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL)) != null ?
  455. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_CUMULATIVE_NAV_WITHDRAWAL))) : null;
  456. if (StrUtil.isBlank(nav) && StrUtil.isBlank(cumulativeNavWithdrawal)) {
  457. return null;
  458. }
  459. emailFundNavDTO.setPriceDate(priceDate);
  460. String fundName = columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME)).getStringCellValue() != null ?
  461. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_FUND_NAME))) : null;
  462. emailFundNavDTO.setFundName(fundName);
  463. String registerNumber = columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER)) != null ?
  464. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER))) : null;
  465. emailFundNavDTO.setRegisterNumber(registerNumber);
  466. emailFundNavDTO.setNav(nav);
  467. emailFundNavDTO.setCumulativeNavWithdrawal(cumulativeNavWithdrawal);
  468. String virtualNav = columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV)) != null ?
  469. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_VIRTUAL_NAV))) : null;
  470. emailFundNavDTO.setVirtualNav(virtualNav);
  471. String assetNet = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET)) != null ?
  472. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_NET))) : null;
  473. emailFundNavDTO.setAssetNet(ExcelUtil.numberDataStripCommas(assetNet));
  474. String assetShares = columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE) != null && sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE)) != null ?
  475. ExcelUtil.getCellValue(sheetRow.getCell(columnFieldMap.get(EmailFieldConst.PARENT_ASSET_SHARE))) : null;
  476. emailFundNavDTO.setAssetShare(ExcelUtil.numberDataStripCommas(assetShares));
  477. return emailFundNavDTO;
  478. }
  479. private Map<String, Integer> getFieldRow(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  480. // 考虑日期字段识别逻辑的问题
  481. long rowNumCount = fieldPositionMap.values().stream().map(Pair::getKey).distinct().count();
  482. if (rowNumCount > 1) {
  483. // 存在合并单元格的方式 -> 日期字段所在位置可能会存在错误
  484. Pair<Integer, Integer> priceDatePair = fieldPositionMap.get(EmailFieldConst.PRICE_DATE);
  485. //补丁,如果存在合并单元格,判断日期所在的行是否有其他字段列,如果存在,则表名是行格式数据,而不是其他格式数据
  486. if(priceDatePair != null){
  487. List<Integer> pairKey = fieldPositionMap.values().stream().map(Pair::getKey).collect(Collectors.toList());
  488. List<Integer> priceDateKey = pairKey.stream().filter(e -> e == priceDatePair.getKey()).collect(Collectors.toList());
  489. if(priceDateKey.size() < 2){
  490. fieldPositionMap.remove(EmailFieldConst.PRICE_DATE);
  491. }
  492. }
  493. }
  494. Map<String, Integer> fieldRowMap = MapUtil.newHashMap();
  495. for (Map.Entry<String, Pair<Integer, Integer>> fieldPositionEntry : fieldPositionMap.entrySet()) {
  496. String field = fieldPositionEntry.getKey();
  497. Integer column = fieldPositionEntry.getValue().getValue();
  498. fieldRowMap.put(field, column);
  499. }
  500. return fieldRowMap;
  501. }
  502. private Map<Integer, String> getRowField(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  503. Map<Integer, String> fieldRowMap = MapUtil.newHashMap();
  504. for (Map.Entry<String, Pair<Integer, Integer>> fieldPositionEntry : fieldPositionMap.entrySet()) {
  505. String field = fieldPositionEntry.getKey();
  506. Integer column = fieldPositionEntry.getValue().getKey();
  507. fieldRowMap.put(column, field);
  508. }
  509. return fieldRowMap;
  510. }
  511. /**
  512. * 找出excel中表头所在的位置
  513. *
  514. * @param sheet 表格工作簿
  515. * @param emailFieldMap 邮件字段识别规则映射表
  516. * @return excel中表头所在的位置(行, 列)
  517. */
  518. private Map<String, Pair<Integer, Integer>> getFieldPosition(Sheet sheet, Map<String, List<String>> emailFieldMap) {
  519. Map<String, List<FieldPositionDTO>> tempFieldPositionMap = MapUtil.newHashMap();
  520. int lastRowNum = sheet.getLastRowNum();
  521. for (int rowNum = 0; rowNum <= lastRowNum; rowNum++) {
  522. Row sheetRow = sheet.getRow(rowNum);
  523. if (sheetRow == null) {
  524. continue;
  525. }
  526. int lastCellNum = sheetRow.getLastCellNum();
  527. for (int cellNum = 0; cellNum < lastCellNum; cellNum++) {
  528. Cell cell = sheetRow.getCell(cellNum);
  529. if (cell == null) {
  530. continue;
  531. }
  532. String cellValue = ExcelUtil.getCellValue(cell);
  533. // 移除掉非中文字符
  534. String newCellValue = StringUtil.retainChineseCharacters(cellValue, NOT_CONVERT_FIELD_LIST);
  535. String field = fieldMatch(newCellValue, emailFieldMap);
  536. if (StrUtil.isNotBlank(field)) {
  537. List<FieldPositionDTO> fieldPositionDTOList = tempFieldPositionMap.getOrDefault(field, new ArrayList<>());
  538. fieldPositionDTOList.add(new FieldPositionDTO(newCellValue, Pair.of(rowNum, cellNum)));
  539. tempFieldPositionMap.put(field, fieldPositionDTOList);
  540. }
  541. }
  542. }
  543. // 判断是不是份额基金净值文件格式(同时存在两个备案编码字段)
  544. return handlerFieldPosition(tempFieldPositionMap);
  545. }
  546. private Map<String, Pair<Integer, Integer>> handlerFieldPosition(Map<String, List<FieldPositionDTO>> tempFieldPositionMap) {
  547. Map<String, Pair<Integer, Integer>> fieldPositionMap = MapUtil.newHashMap();
  548. boolean hasParentField = tempFieldPositionMap.keySet().stream().anyMatch(e -> e.contains("parent"));
  549. for (Map.Entry<String, List<FieldPositionDTO>> entry : tempFieldPositionMap.entrySet()) {
  550. String field = entry.getKey();
  551. List<FieldPositionDTO> fieldPositionDTOList = entry.getValue();
  552. int size = fieldPositionDTOList.size();
  553. if (size == 1) {
  554. fieldPositionMap.put(field, fieldPositionDTOList.get(0).getPair());
  555. continue;
  556. }
  557. if ((!hasParentField && size > 1)) {
  558. if (EmailFieldConst.REGISTER_NUMBER.equals(field)) {
  559. Pair<Integer, Integer> pair = fieldPositionDTOList.stream()
  560. .filter(e -> !e.getFieldValue().contains("协会") && !e.getFieldValue().contains("备案")).map(FieldPositionDTO::getPair).findFirst().orElse(null);
  561. fieldPositionMap.put(field, pair);
  562. } else {
  563. fieldPositionMap.put(field, fieldPositionDTOList.get(size - 1).getPair());
  564. }
  565. continue;
  566. }
  567. if ((hasParentField && size > 1)) {
  568. fieldPositionMap.put(field, fieldPositionDTOList.get(0).getPair());
  569. }
  570. }
  571. // 母基金缺少代码的情况
  572. if (hasParentField && fieldPositionMap.get(EmailFieldConst.PARENT_REGISTER_NUMBER) == null) {
  573. List<FieldPositionDTO> fieldPositionDTOS = tempFieldPositionMap.get(EmailFieldConst.REGISTER_NUMBER);
  574. if (CollUtil.isNotEmpty(fieldPositionDTOS)) {
  575. Pair<Integer, Integer> parentRegisterNumberPair = fieldPositionDTOS.stream()
  576. .filter(e -> e.getFieldValue().contains("协会") || e.getFieldValue().contains("备案")).map(FieldPositionDTO::getPair).findFirst().orElse(null);
  577. fieldPositionMap.put(EmailFieldConst.PARENT_REGISTER_NUMBER, parentRegisterNumberPair);
  578. }
  579. }
  580. return fieldPositionMap;
  581. }
  582. /**
  583. * 判断单元格值是否为表头字段
  584. *
  585. * @param cellValue 单元格值
  586. * @param emailFieldMap 邮件字段识别规则映射表
  587. * @return 表头对应的标识
  588. */
  589. public String fieldMatch(String cellValue, Map<String, List<String>> emailFieldMap) {
  590. if (StrUtil.isBlank(cellValue)) {
  591. return null;
  592. }
  593. for (Map.Entry<String, List<String>> fieldEntry : emailFieldMap.entrySet()) {
  594. List<String> fieldList = fieldEntry.getValue();
  595. for (String field : fieldList) {
  596. if (cellValue.equals(field)) {
  597. return fieldEntry.getKey();
  598. }
  599. }
  600. }
  601. return null;
  602. }
  603. }