EmailParseService.java 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729
  1. package com.smppw.modaq.domain.service;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.collection.ListUtil;
  4. import cn.hutool.core.date.DateUtil;
  5. import cn.hutool.core.exceptions.ExceptionUtil;
  6. import cn.hutool.core.map.MapUtil;
  7. import cn.hutool.core.util.StrUtil;
  8. import com.smppw.modaq.application.components.ReportParseUtils;
  9. import com.smppw.modaq.application.components.report.parser.ReportParser;
  10. import com.smppw.modaq.application.components.report.parser.ReportParserFactory;
  11. import com.smppw.modaq.application.components.report.writer.ReportWriter;
  12. import com.smppw.modaq.application.components.report.writer.ReportWriterFactory;
  13. import com.smppw.modaq.application.util.EmailUtil;
  14. import com.smppw.modaq.common.conts.DateConst;
  15. import com.smppw.modaq.common.conts.EmailParseStatusConst;
  16. import com.smppw.modaq.common.conts.EmailTypeConst;
  17. import com.smppw.modaq.common.enums.ReportParseStatus;
  18. import com.smppw.modaq.common.enums.ReportParserFileType;
  19. import com.smppw.modaq.common.enums.ReportType;
  20. import com.smppw.modaq.common.exception.NotSupportReportException;
  21. import com.smppw.modaq.common.exception.ReportParseException;
  22. import com.smppw.modaq.domain.dto.EmailContentInfoDTO;
  23. import com.smppw.modaq.domain.dto.EmailZipFileDTO;
  24. import com.smppw.modaq.domain.dto.MailboxInfoDTO;
  25. import com.smppw.modaq.domain.dto.report.ParseResult;
  26. import com.smppw.modaq.domain.dto.report.ReportData;
  27. import com.smppw.modaq.domain.dto.report.ReportParserParams;
  28. import com.smppw.modaq.domain.entity.EmailFileInfoDO;
  29. import com.smppw.modaq.domain.entity.EmailParseInfoDO;
  30. import com.smppw.modaq.domain.mapper.EmailFileInfoMapper;
  31. import com.smppw.modaq.domain.mapper.EmailParseInfoMapper;
  32. import com.smppw.modaq.infrastructure.util.ExcelUtil;
  33. import com.smppw.modaq.infrastructure.util.FileUtil;
  34. import jakarta.mail.*;
  35. import jakarta.mail.internet.MimeUtility;
  36. import jakarta.mail.search.ComparisonTerm;
  37. import jakarta.mail.search.ReceivedDateTerm;
  38. import jakarta.mail.search.SearchTerm;
  39. import org.apache.commons.compress.archivers.ArchiveException;
  40. import org.slf4j.Logger;
  41. import org.slf4j.LoggerFactory;
  42. import org.springframework.beans.factory.annotation.Value;
  43. import org.springframework.stereotype.Service;
  44. import org.springframework.util.StopWatch;
  45. import java.io.File;
  46. import java.io.IOException;
  47. import java.nio.file.Path;
  48. import java.nio.file.Paths;
  49. import java.util.*;
  50. import java.util.regex.Matcher;
  51. import java.util.regex.Pattern;
  52. import java.util.stream.Collectors;
  53. /**
  54. * @author mozuwen
  55. * @date 2024-09-04
  56. * @description 邮件解析服务
  57. */
  58. @Service
  59. public class EmailParseService {
  60. // public static final int stepSize = 10000;
  61. private static final Logger log = LoggerFactory.getLogger(EmailParseService.class);
  62. // private final EmailFieldMappingMapper emailFieldMapper;
  63. private final EmailParseInfoMapper emailParseInfoMapper;
  64. private final EmailFileInfoMapper emailFileInfoMapper;
  65. /* 报告解析和入库的方法 */
  66. private final ReportParserFactory reportParserFactory;
  67. private final ReportWriterFactory reportWriterFactory;
  68. @Value("${email.file.path}")
  69. private String path;
  70. public EmailParseService(EmailParseInfoMapper emailParseInfoMapper,
  71. EmailFileInfoMapper emailFileInfoMapper,
  72. ReportParserFactory reportParserFactory,
  73. ReportWriterFactory reportWriterFactory) {
  74. this.emailParseInfoMapper = emailParseInfoMapper;
  75. this.emailFileInfoMapper = emailFileInfoMapper;
  76. this.reportParserFactory = reportParserFactory;
  77. this.reportWriterFactory = reportWriterFactory;
  78. }
  79. /**
  80. * 解析指定邮箱指定时间范围内的邮件
  81. *
  82. * @param mailboxInfoDTO 邮箱配置信息
  83. * @param startDate 邮件起始日期(yyyy-MM-dd HH:mm:ss)
  84. * @param endDate 邮件截止日期(yyyy-MM-dd HH:mm:ss, 为null,将解析邮件日期小于等于startDate的当天邮件)
  85. * @param emailTypes 当前任务支持的邮件类型,默认支持确认单
  86. */
  87. public void parseEmail(MailboxInfoDTO mailboxInfoDTO,
  88. Date startDate, Date endDate,
  89. List<String> folderNames, List<Integer> emailTypes) {
  90. if (CollUtil.isEmpty(emailTypes)) {
  91. emailTypes = ListUtil.of(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE);
  92. }
  93. log.info("开始邮件解析 -> 邮箱信息:{},开始时间:{},结束时间:{}", mailboxInfoDTO, DateUtil.format(startDate,
  94. DateConst.YYYY_MM_DD_HH_MM_SS), DateUtil.format(endDate, DateConst.YYYY_MM_DD_HH_MM_SS));
  95. // 邮件类型配置
  96. Map<Integer, List<String>> emailTypeMap = getEmailType();
  97. Map<String, List<EmailContentInfoDTO>> emailContentMap;
  98. try {
  99. emailContentMap = realEmail(mailboxInfoDTO, emailTypeMap, startDate, endDate, folderNames);
  100. } catch (Exception e) {
  101. log.error("采集邮件失败 -> 邮箱配置信息:{},堆栈信息:{}", mailboxInfoDTO, ExceptionUtil.stacktraceToString(e));
  102. return;
  103. }
  104. if (MapUtil.isEmpty(emailContentMap)) {
  105. log.warn("未采集到邮件 -> 邮箱配置信息:{},开始时间:{},结束时间:{}", mailboxInfoDTO,
  106. DateUtil.format(startDate, DateConst.YYYY_MM_DD_HH_MM_SS), DateUtil.format(endDate, DateConst.YYYY_MM_DD_HH_MM_SS));
  107. return;
  108. }
  109. for (Map.Entry<String, List<EmailContentInfoDTO>> emailEntry : emailContentMap.entrySet()) {
  110. List<EmailContentInfoDTO> emailContentInfoDTOList = emailEntry.getValue();
  111. if (CollUtil.isEmpty(emailContentInfoDTOList)) {
  112. log.warn("未采集到正文或附件");
  113. continue;
  114. }
  115. log.info("开始解析邮件数据 -> 邮件主题:{},邮件日期:{}", emailContentInfoDTOList.get(0).getEmailTitle(), emailContentInfoDTOList.get(0).getEmailDate());
  116. Map<EmailContentInfoDTO, List<EmailZipFileDTO>> emailZipFileMap = MapUtil.newHashMap();
  117. for (EmailContentInfoDTO emailContentInfoDTO : emailContentInfoDTOList) {
  118. // 正文不用解压附件
  119. if (emailContentInfoDTO.getFileName() != null && emailContentInfoDTO.getFileName().endsWith(".html")) {
  120. emailZipFileMap.put(emailContentInfoDTO, ListUtil.empty());
  121. continue;
  122. }
  123. try {
  124. List<EmailZipFileDTO> fundNavDTOList = parseZipEmail(emailContentInfoDTO);
  125. emailZipFileMap.put(emailContentInfoDTO, fundNavDTOList);
  126. } catch (IOException | ArchiveException e) {
  127. log.error("压缩包解压失败:{}", ExceptionUtil.stacktraceToString(e));
  128. EmailParseInfoDO fail = buildEmailParseInfo(null, mailboxInfoDTO.getAccount(), emailContentInfoDTO);
  129. fail.setFailReason("压缩包解压失败");
  130. fail.setParseStatus(EmailParseStatusConst.FAIL);
  131. fail.setEmailKey(emailEntry.getKey());
  132. this.emailParseInfoMapper.insert(fail);
  133. } catch (Exception e) {
  134. log.error("堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  135. }
  136. }
  137. Iterator<Map.Entry<EmailContentInfoDTO, List<EmailZipFileDTO>>> entryIterator = emailZipFileMap.entrySet().iterator();
  138. while (entryIterator.hasNext()) {
  139. Map.Entry<EmailContentInfoDTO, List<EmailZipFileDTO>> entry = entryIterator.next();
  140. EmailContentInfoDTO key = entry.getKey();
  141. List<EmailZipFileDTO> dtos = entry.getValue();
  142. List<Integer> types = ListUtil.list(false);
  143. types.add(key.getEmailType());
  144. if (CollUtil.isNotEmpty(dtos)) {
  145. List<Integer> list = dtos.stream().map(EmailZipFileDTO::getEmailType).distinct().toList();
  146. types.addAll(list);
  147. }
  148. boolean flag = false;
  149. for (Integer type : types) {
  150. if (emailTypes.contains(type)) {
  151. flag = true;
  152. break;
  153. }
  154. }
  155. if (!flag) {
  156. log.warn("当前邮件{} 的类型{} 不在支持的任务类型{} 中,不用执行解析逻辑。", key, types, emailTypes);
  157. entryIterator.remove();
  158. }
  159. }
  160. // 保存相关信息 -> 邮件信息表,邮件文件表,邮件净值表,邮件规模表,基金净值表
  161. saveRelatedTable(emailEntry.getKey(), mailboxInfoDTO.getAccount(), emailZipFileMap);
  162. log.info("结束邮件解析 -> 邮箱信息:{},开始时间:{},结束时间:{}", mailboxInfoDTO,
  163. DateUtil.format(startDate, DateConst.YYYY_MM_DD_HH_MM_SS), DateUtil.format(endDate, DateConst.YYYY_MM_DD_HH_MM_SS));
  164. }
  165. }
  166. public List<EmailZipFileDTO> parseZipEmail(EmailContentInfoDTO emailContentInfoDTO) throws Exception {
  167. List<EmailZipFileDTO> resultList = ListUtil.list(false);
  168. Integer emailType = emailContentInfoDTO.getEmailType();
  169. String filepath = emailContentInfoDTO.getFilePath();
  170. if (ExcelUtil.isZip(filepath)) {
  171. handleCompressedFiles(filepath, ".zip", emailType, resultList);
  172. } else if (ExcelUtil.isRAR(filepath)) {
  173. handleCompressedFiles(filepath, ".rar", emailType, resultList);
  174. }
  175. // 文件中的类型判断
  176. if (emailType == null || !EmailTypeConst.SUPPORT_EMAIL_TYPES.contains(emailType)) {
  177. emailType = EmailUtil.getEmailTypeBySubject(emailContentInfoDTO.getFileName(), this.getEmailType());
  178. emailContentInfoDTO.setEmailType(emailType);
  179. }
  180. return resultList;
  181. }
  182. private void handleCompressedFiles(String filepath, String extension, Integer emailType, List<EmailZipFileDTO> resultList) throws Exception {
  183. String destPath = getDestinationPath(filepath, extension);
  184. log.info("压缩包地址:{}, 解压后文件地址:{}", filepath, destPath);
  185. File destFile = new File(destPath);
  186. if (!destFile.exists()) {
  187. if (!destFile.mkdirs()) {
  188. throw new IOException("无法创建目标目录: " + destPath);
  189. }
  190. }
  191. List<String> extractedDirs;
  192. if (ExcelUtil.isZip(filepath)) {
  193. extractedDirs = ExcelUtil.extractCompressedFiles(filepath, destPath);
  194. } else if (ExcelUtil.isRAR(filepath)) {
  195. extractedDirs = ExcelUtil.extractRar5(filepath, destPath);
  196. } else {
  197. return;
  198. }
  199. for (String dir : extractedDirs) {
  200. // 如果邮件类型不满足解析条件则重新根据文件名判断
  201. if (emailType == null || !EmailTypeConst.SUPPORT_EMAIL_TYPES.contains(emailType)) {
  202. emailType = EmailUtil.getEmailTypeBySubject(dir, this.getEmailType());
  203. }
  204. File file = new File(dir);
  205. if (file.isDirectory()) {
  206. String[] subDirs = file.list();
  207. if (subDirs != null) {
  208. for (String subDir : subDirs) {
  209. resultList.add(new EmailZipFileDTO(subDir, emailType));
  210. }
  211. } else {
  212. log.warn("目录 {} 下无文件", dir);
  213. }
  214. } else {
  215. resultList.add(new EmailZipFileDTO(dir, emailType));
  216. }
  217. }
  218. }
  219. private String getDestinationPath(String filepath, String extension) {
  220. Path path = Paths.get(filepath);
  221. String fileName = path.getFileName().toString();
  222. String baseName = fileName.substring(0, fileName.length() - extension.length());
  223. return path.getParent().resolve(baseName).toString();
  224. }
  225. public void saveRelatedTable(String emailKey, String emailAddress,
  226. Map<EmailContentInfoDTO, List<EmailZipFileDTO>> emailZipFileMap) {
  227. // python 报告解析接口结果
  228. List<ParseResult<ReportData>> dataList = ListUtil.list(false);
  229. for (Map.Entry<EmailContentInfoDTO, List<EmailZipFileDTO>> entry : emailZipFileMap.entrySet()) {
  230. EmailContentInfoDTO emailContentInfoDTO = entry.getKey();
  231. if (emailContentInfoDTO.getFileName() != null && emailContentInfoDTO.getFileName().endsWith(".html")) {
  232. continue;
  233. }
  234. Integer emailId = emailContentInfoDTO.getEmailId();
  235. EmailParseInfoDO emailParseInfoDO = buildEmailParseInfo(emailId, emailAddress, emailContentInfoDTO);
  236. emailParseInfoDO.setEmailKey(emailKey);
  237. emailId = saveEmailParseInfo(emailParseInfoDO);
  238. if (emailId == null) {
  239. continue;
  240. }
  241. List<EmailZipFileDTO> zipFiles = entry.getValue();
  242. if (CollUtil.isNotEmpty(zipFiles)) {
  243. for (EmailZipFileDTO zipFile : zipFiles) {
  244. EmailFileInfoDO emailFile = saveEmailFileInfo(emailId, null, zipFile.getFilename(), zipFile.getFilepath(), null);
  245. // 解析并保存报告
  246. ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailFile.getId(), zipFile.getFilename(),
  247. zipFile.getFilepath(), zipFile.getEmailType(), emailFile.getAiFileId());
  248. dataList.add(parseResult);
  249. }
  250. } else {
  251. String fileName = emailContentInfoDTO.getFileName();
  252. EmailFileInfoDO emailFile = saveEmailFileInfo(emailId, emailContentInfoDTO.getFileId(), fileName,
  253. emailContentInfoDTO.getFilePath(), emailContentInfoDTO.getAiFileId());
  254. // 解析并保存报告
  255. ParseResult<ReportData> parseResult = this.parseReportAndHandleResult(emailFile.getId(), fileName,
  256. emailContentInfoDTO.getFilePath(), emailContentInfoDTO.getEmailType(), emailFile.getAiFileId());
  257. dataList.add(parseResult);
  258. }
  259. String failReason = null;
  260. int emailParseStatus = EmailParseStatusConst.SUCCESS;
  261. // 报告邮件有一条失败就表示整个邮件解析失败
  262. if (CollUtil.isNotEmpty(dataList)) {
  263. // ai解析结果
  264. List<ReportData> aiParaseList = dataList.stream().map(ParseResult::getData)
  265. .filter(Objects::nonNull).filter(e -> Objects.equals(true, e.getAiParse())).toList();
  266. if (CollUtil.isNotEmpty(aiParaseList)) {
  267. for (ReportData data : aiParaseList) {
  268. this.emailFileInfoMapper.updateAiParseByFileId(data.getBaseInfo().getFileId(), data.getAiParse(), data.getAiFileId());
  269. }
  270. }
  271. long failNum = dataList.stream().filter(e -> !Objects.equals(EmailParseStatusConst.SUCCESS, e.getStatus())).count();
  272. if (failNum > 0) {
  273. emailParseStatus = EmailParseStatusConst.FAIL;
  274. failReason = dataList.stream().map(ParseResult::getMsg).collect(Collectors.joining(";"));
  275. }
  276. }
  277. emailParseInfoMapper.updateParseStatus(emailId, emailParseStatus, failReason);
  278. }
  279. }
  280. private ParseResult<ReportData> parseReportAndHandleResult(int fileId, String fileName,
  281. String filepath, Integer emailType, String aiFileId) {
  282. ParseResult<ReportData> result = new ParseResult<>();
  283. boolean reportFlag = emailType == null || !EmailTypeConst.SUPPORT_EMAIL_TYPES.contains(emailType);
  284. if (reportFlag || StrUtil.isBlank(fileName) || fileName.endsWith(".html")) {
  285. result.setStatus(ReportParseStatus.NOT_A_REPORT.getCode());
  286. result.setMsg(StrUtil.format(ReportParseStatus.NOT_A_REPORT.getMsg(), fileName));
  287. return result;
  288. }
  289. Pattern pattern = Pattern.compile("[A-Z0-9]{6}");
  290. Matcher matcher = pattern.matcher(fileName);
  291. String registerNumber = null;
  292. if (matcher.find()) {
  293. registerNumber = matcher.group();
  294. }
  295. // 类型识别---先识别季度报告,没有季度再识别年度报告,最后识别月报
  296. ReportType reportType = ReportParseUtils.matchReportType(fileName);
  297. if (Objects.equals(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE, emailType)) {
  298. reportType = ReportType.LETTER;
  299. }
  300. // 解析器--根据文件后缀获取对应解析器,解析不了就用AI来解析
  301. ReportParserFileType fileType;
  302. String fileSuffix = StrUtil.subAfter(fileName, ".", true);
  303. fileType = ReportParserFileType.getBySuffix(fileSuffix);
  304. // 不支持的格式
  305. if (fileType == null) {
  306. result.setStatus(ReportParseStatus.NO_SUPPORT_TEMPLATE.getCode());
  307. result.setMsg(StrUtil.format(ReportParseStatus.NO_SUPPORT_TEMPLATE.getMsg(), fileName));
  308. return result;
  309. }
  310. // 不是定期报告的判断逻辑放在不支持的格式下面
  311. if (reportType == null) {
  312. result.setStatus(ReportParseStatus.NOT_A_REPORT.getCode());
  313. result.setMsg(StrUtil.format(ReportParseStatus.NOT_A_REPORT.getMsg(), fileName));
  314. return result;
  315. }
  316. // 不支持解析的格式文件
  317. boolean notSupportFile = false;
  318. // 解析报告
  319. ReportData reportData = null;
  320. StopWatch parserWatch = new StopWatch();
  321. parserWatch.start();
  322. try {
  323. if (StrUtil.isBlank(aiFileId) && reportType != ReportType.OTHER && reportType != ReportType.WEEKLY) {
  324. ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName).filepath(filepath)
  325. .registerNumber(registerNumber).reportType(reportType).build();
  326. ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, fileType);
  327. reportData = instance.parse(params);
  328. result.setStatus(1);
  329. result.setMsg("报告解析成功");
  330. result.setData(reportData);
  331. } else {
  332. if (reportType == ReportType.OTHER || reportType == ReportType.WEEKLY) {
  333. if (log.isInfoEnabled()) {
  334. log.info("报告{} 是周报或其他类型,直接用AI解析器解析", fileName);
  335. }
  336. } else {
  337. if (log.isInfoEnabled()) {
  338. log.info("报告{} 是已经存在ai解析记录,上传过文件{},直接跳转到AI解析器进行解析", fileName, fileId);
  339. }
  340. }
  341. }
  342. } catch (ReportParseException e) {
  343. log.error("解析失败:{}", StrUtil.format(e.getMsg(), fileName));
  344. result.setStatus(e.getCode());
  345. result.setMsg(StrUtil.format(e.getMsg(), fileName));
  346. if (e instanceof NotSupportReportException) {
  347. notSupportFile = true;
  348. }
  349. } catch (Exception e) {
  350. log.error("解析错误:{}", ExceptionUtil.stacktraceToString(e));
  351. result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
  352. result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
  353. } finally {
  354. // 如果解析结果是空的就用AI工具解析一次
  355. if (reportData == null && !notSupportFile) {
  356. if (log.isInfoEnabled()) {
  357. log.info("报告{} 开始AI解析......", fileName);
  358. }
  359. ReportParserParams params = ReportParserParams.builder().fileId(fileId).filename(fileName).filepath(filepath)
  360. .registerNumber(registerNumber).reportType(reportType).aiFileId(aiFileId).build();
  361. ReportParser<ReportData> instance = this.reportParserFactory.getInstance(reportType, ReportParserFileType.AI);
  362. try {
  363. reportData = instance.parse(params);
  364. result.setStatus(1);
  365. result.setMsg("报告解析成功--AI");
  366. result.setData(reportData);
  367. } catch (ReportParseException e) {
  368. log.error("AI解析失败:{}", StrUtil.format(e.getMsg(), fileName));
  369. result.setStatus(e.getCode());
  370. result.setMsg(StrUtil.format(e.getMsg(), fileName));
  371. } catch (Exception e) {
  372. log.error("AI解析错误:{}", ExceptionUtil.stacktraceToString(e));
  373. result.setStatus(ReportParseStatus.PARSE_FAIL.getCode());
  374. result.setMsg(StrUtil.format(ReportParseStatus.PARSE_FAIL.getMsg(), e.getMessage()));
  375. }
  376. if (log.isInfoEnabled()) {
  377. log.info("报告{} AI解析结束!", fileName);
  378. }
  379. }
  380. parserWatch.stop();
  381. if (log.isInfoEnabled()) {
  382. log.info("报告{}解析结果为{},耗时{}ms", fileName, reportData, parserWatch.getTotalTimeMillis());
  383. }
  384. }
  385. // 保存报告解析结果
  386. if (reportData != null) {
  387. StopWatch writeWatch = new StopWatch();
  388. writeWatch.start();
  389. try {
  390. ReportWriter<ReportData> instance = this.reportWriterFactory.getInstance(reportType);
  391. instance.write(reportData);
  392. } catch (Exception e) {
  393. log.error("报告{}结果保存失败\n{}", fileName, ExceptionUtil.stacktraceToString(e));
  394. } finally {
  395. writeWatch.stop();
  396. if (log.isInfoEnabled()) {
  397. log.info("报告{}解析结果保存完成,耗时{}ms", fileName, writeWatch.getTotalTimeMillis());
  398. }
  399. }
  400. }
  401. return result;
  402. }
  403. private EmailFileInfoDO saveEmailFileInfo(Integer emailId, Integer fileId, String fileName, String filePath, String aiFileId) {
  404. EmailFileInfoDO emailFileInfoDO = buildEmailFileInfoDO(emailId, fileId, fileName, filePath);
  405. emailFileInfoDO.setAiFileId(aiFileId);
  406. if (emailFileInfoDO.getId() != null) {
  407. emailFileInfoMapper.updateTimeById(fileId, new Date());
  408. return emailFileInfoDO;
  409. }
  410. emailFileInfoMapper.insert(emailFileInfoDO);
  411. return emailFileInfoDO;
  412. }
  413. private EmailFileInfoDO buildEmailFileInfoDO(Integer emailId, Integer fileId, String fileName, String filePath) {
  414. EmailFileInfoDO emailFileInfoDO = new EmailFileInfoDO();
  415. emailFileInfoDO.setId(fileId);
  416. emailFileInfoDO.setEmailId(emailId);
  417. emailFileInfoDO.setFileName(fileName);
  418. emailFileInfoDO.setFilePath(filePath);
  419. emailFileInfoDO.setIsvalid(1);
  420. emailFileInfoDO.setCreatorId(0);
  421. emailFileInfoDO.setCreateTime(new Date());
  422. emailFileInfoDO.setUpdaterId(0);
  423. emailFileInfoDO.setUpdateTime(new Date());
  424. return emailFileInfoDO;
  425. }
  426. private Integer saveEmailParseInfo(EmailParseInfoDO emailParseInfoDO) {
  427. if (emailParseInfoDO == null) {
  428. return null;
  429. }
  430. // 重新邮件功能 -> 修改解析时间和更新时间
  431. if (emailParseInfoDO.getId() != null) {
  432. emailParseInfoMapper.updateParseTime(emailParseInfoDO.getId(), emailParseInfoDO.getParseDate());
  433. return emailParseInfoDO.getId();
  434. }
  435. // // 根据邮件发送人、邮件地址、邮箱日期、主题找到是否已经存在的记录(不管是否成功),已存在就不解析了
  436. // EmailParseInfoDO temp = this.emailParseInfoMapper.searchEmail(emailParseInfoDO);
  437. // if (temp != null) {
  438. // return null;
  439. // }
  440. emailParseInfoMapper.insert(emailParseInfoDO);
  441. return emailParseInfoDO.getId();
  442. }
  443. private EmailParseInfoDO buildEmailParseInfo(Integer emailId, String emailAddress, EmailContentInfoDTO emailContentInfoDTO) {
  444. EmailParseInfoDO emailParseInfoDO = new EmailParseInfoDO();
  445. emailParseInfoDO.setId(emailId);
  446. emailParseInfoDO.setSenderEmail(emailContentInfoDTO.getSenderEmail());
  447. emailParseInfoDO.setEmail(emailAddress);
  448. emailParseInfoDO.setEmailDate(DateUtil.parse(emailContentInfoDTO.getEmailDate(), DateConst.YYYY_MM_DD_HH_MM_SS));
  449. emailParseInfoDO.setParseDate(emailContentInfoDTO.getParseDate() == null ? null : DateUtil.parseDate(emailContentInfoDTO.getParseDate()));
  450. emailParseInfoDO.setEmailTitle(emailContentInfoDTO.getEmailTitle());
  451. emailParseInfoDO.setEmailType(emailContentInfoDTO.getEmailType());
  452. emailParseInfoDO.setParseStatus(EmailParseStatusConst.SUCCESS);
  453. emailParseInfoDO.setAttrSize(emailContentInfoDTO.getFileSize());
  454. emailParseInfoDO.setIsvalid(1);
  455. emailParseInfoDO.setCreatorId(0);
  456. emailParseInfoDO.setCreateTime(new Date());
  457. emailParseInfoDO.setUpdaterId(0);
  458. emailParseInfoDO.setUpdateTime(new Date());
  459. return emailParseInfoDO;
  460. }
  461. public Map<Integer, List<String>> getEmailType() {
  462. Map<Integer, List<String>> emailTypeMap = MapUtil.newHashMap(3, true);
  463. emailTypeMap.put(EmailTypeConst.REPORT_EMAIL_TYPE,
  464. ListUtil.toList("月报", "月度报告", "季报", "季度报告", "年报", "年度报告"));
  465. emailTypeMap.put(EmailTypeConst.REPORT_LETTER_EMAIL_TYPE,
  466. ListUtil.toList(ReportType.LETTER.getPatterns()));
  467. emailTypeMap.put(EmailTypeConst.REPORT_OTHER_TYPE,
  468. ListUtil.toList(ReportType.OTHER.getPatterns()));
  469. emailTypeMap.put(EmailTypeConst.REPORT_WEEKLY_TYPE,
  470. ListUtil.toList(ReportType.WEEKLY.getPatterns()));
  471. return emailTypeMap;
  472. }
  473. /**
  474. * 读取邮件
  475. *
  476. * @param mailboxInfoDTO 邮箱配置信息
  477. * @param emailTypeMap 邮件类型识别规则映射表
  478. * @param startDate 邮件起始日期
  479. * @param endDate 邮件截止日期(为null,将解析邮件日期小于等于startDate的当天邮件)
  480. * @return 读取到的邮件信息
  481. * @throws Exception 异常信息
  482. */
  483. private Map<String, List<EmailContentInfoDTO>> realEmail(MailboxInfoDTO mailboxInfoDTO,
  484. Map<Integer, List<String>> emailTypeMap,
  485. Date startDate, Date endDate,
  486. List<String> folderNames) throws Exception {
  487. if (CollUtil.isEmpty(folderNames)) {
  488. folderNames.add("INBOX");
  489. }
  490. Store store = EmailUtil.getStoreNew(mailboxInfoDTO);
  491. if (store == null) {
  492. return MapUtil.newHashMap(4);
  493. }
  494. Map<String, List<EmailContentInfoDTO>> result = MapUtil.newHashMap(128);
  495. try {
  496. if (log.isInfoEnabled()) {
  497. Folder[] list = store.getDefaultFolder().list("*");
  498. List<String> names = Arrays.stream(list).map(Folder::getFullName).toList();
  499. log.info("获取所有邮箱文件夹:{}", names);
  500. }
  501. for (String folderName : folderNames) {
  502. try {
  503. Map<String, List<EmailContentInfoDTO>> temp = this.getFolderEmail(mailboxInfoDTO, emailTypeMap,
  504. startDate, endDate, store, folderName);
  505. if (MapUtil.isNotEmpty(temp)) {
  506. result.putAll(temp);
  507. }
  508. } catch (Exception e) {
  509. log.warn("文件夹{} 邮件获取失败:{}", folderName, ExceptionUtil.stacktraceToString(e));
  510. }
  511. }
  512. } catch (Exception e) {
  513. log.error("邮件获取失败:{}", ExceptionUtil.stacktraceToString(e));
  514. } finally {
  515. store.close();
  516. }
  517. return result;
  518. }
  519. private Map<String, List<EmailContentInfoDTO>> getFolderEmail(MailboxInfoDTO mailboxInfoDTO,
  520. Map<Integer, List<String>> emailTypeMap,
  521. Date startDate, Date endDate,
  522. Store store, String folderName) throws MessagingException {
  523. // 默认读取收件箱的邮件
  524. Folder folder = store.getFolder(folderName);
  525. folder.open(Folder.READ_ONLY);
  526. Message[] messages = getEmailMessage(folder, mailboxInfoDTO.getProtocol(), startDate);
  527. if (messages == null || messages.length == 0) {
  528. log.warn("{} 获取不到邮件 -> 邮箱信息:{},开始时间:{},结束时间:{}", folderName, mailboxInfoDTO, startDate, endDate);
  529. return MapUtil.newHashMap();
  530. }
  531. Map<String, List<EmailContentInfoDTO>> emailMessageMap = MapUtil.newHashMap();
  532. for (Message message : messages) {
  533. long start = System.currentTimeMillis();
  534. List<EmailContentInfoDTO> emailContentInfoDTOList = CollUtil.newArrayList();
  535. String uuidKey = UUID.randomUUID().toString().replaceAll("-", "");
  536. Integer emailType;
  537. String senderEmail;
  538. String emailTitle = null;
  539. try {
  540. emailTitle = message.getSubject();
  541. Date emailDate = message.getSentDate();
  542. String emailDateStr = DateUtil.format(emailDate, DateConst.YYYY_MM_DD_HH_MM_SS);
  543. if (log.isInfoEnabled()) {
  544. log.info("{} 邮件{} 数据获取中,邮件时间:{}", folderName, emailTitle, emailDateStr);
  545. }
  546. boolean isNotParseConditionSatisfied = emailDate == null
  547. || (endDate != null && emailDate.compareTo(endDate) > 0)
  548. || (startDate != null && emailDate.compareTo(startDate) < 0);
  549. if (isNotParseConditionSatisfied) {
  550. log.warn("{} 邮件{} 没有日期{} 或者 邮件日期不在区间内【{} ~ {}】", folderName, emailTitle, emailDate, startDate, endDate);
  551. continue;
  552. }
  553. senderEmail = getSenderEmail(message);
  554. emailType = EmailUtil.getEmailTypeBySubject(emailTitle, emailTypeMap);
  555. if (emailType == null) {
  556. log.warn("{} 邮件不满足解析条件 -> 邮件主题:{},邮件日期:{}", folderName, emailTitle, emailDateStr);
  557. continue;
  558. }
  559. log.info("{} 邮件{} 基本信息获取完成,开始下载附件!邮件日期:{}", folderName, emailTitle, emailDateStr);
  560. Object content = message.getContent();
  561. if (content instanceof Multipart multipart) {
  562. this.reMultipart(mailboxInfoDTO.getAccount(), emailTitle, emailDate, multipart, emailContentInfoDTOList);
  563. } else if (content instanceof Part part) {
  564. this.rePart(mailboxInfoDTO.getAccount(), emailTitle, emailDate, part, emailContentInfoDTOList);
  565. } else {
  566. log.warn("{} 不支持的邮件数据 {}", folderName, emailTitle);
  567. }
  568. if (CollUtil.isNotEmpty(emailContentInfoDTOList)) {
  569. emailContentInfoDTOList.forEach(e -> {
  570. e.setEmailType(emailType);
  571. e.setSenderEmail(senderEmail);
  572. });
  573. emailMessageMap.put(uuidKey, emailContentInfoDTOList);
  574. }
  575. if (log.isInfoEnabled() && emailTitle != null) {
  576. log.info("{} 邮件{} 下载完成,总计耗时{} ms,文件内容如下\n {}", folderName,
  577. emailTitle, System.currentTimeMillis() - start, emailContentInfoDTOList);
  578. }
  579. } catch (Exception e) {
  580. log.error("{} 获取邮箱的邮件{} 报错,堆栈信息:{}", folderName, emailTitle, ExceptionUtil.stacktraceToString(e));
  581. }
  582. }
  583. folder.close(false);
  584. return emailMessageMap;
  585. }
  586. private void rePart(String account, String subject, Date sendDate, Part part, List<EmailContentInfoDTO> emailContentInfoDTOList) throws Exception {
  587. String disposition = part.getDisposition();
  588. if (disposition != null && (disposition.equals(Part.ATTACHMENT) || disposition.equals(Part.INLINE))) {
  589. String emailDate = DateUtil.format(sendDate, DateConst.YYYYMMDDHHMMSS24);
  590. String emailDateStr = DateUtil.format(sendDate, DateConst.YYYYMMDD);
  591. String filePath = path + File.separator + account + File.separator + emailDateStr + File.separator;
  592. EmailContentInfoDTO emailContentInfoDTO = new EmailContentInfoDTO();
  593. String fileName = MimeUtility.decodeText(part.getFileName());
  594. emailContentInfoDTO.setFileName(fileName);
  595. emailContentInfoDTO.setFileSize(part.getSize());
  596. if (log.isInfoEnabled()) {
  597. log.info("邮件{} 大小:{}byte 开始下载。。。。。", subject, part.getSize());
  598. }
  599. String realPath = filePath + emailDate + fileName;
  600. File saveFile = cn.hutool.core.io.FileUtil.file(realPath);
  601. if (!saveFile.exists()) {
  602. if (!saveFile.getParentFile().exists()) {
  603. boolean mkdirs = saveFile.getParentFile().mkdirs();
  604. if (!mkdirs) {
  605. log.warn("file path mkdir failed.");
  606. }
  607. }
  608. FileUtil.saveFile(saveFile, part);
  609. } else {
  610. cn.hutool.core.io.FileUtil.del(saveFile);
  611. FileUtil.saveFile(saveFile, part);
  612. }
  613. emailContentInfoDTO.setFilePath(saveFile.getAbsolutePath());
  614. emailContentInfoDTO.setEmailAddress(account);
  615. emailContentInfoDTO.setEmailTitle(subject);
  616. emailContentInfoDTO.setEmailDate(DateUtil.format(sendDate, DateConst.YYYY_MM_DD_HH_MM_SS));
  617. emailContentInfoDTOList.add(emailContentInfoDTO);
  618. }
  619. }
  620. private void reMultipart(String account, String subject, Date emailDate, Multipart multipart, List<EmailContentInfoDTO> emailContentInfoDTOList) throws Exception {
  621. for (int i = 0; i < multipart.getCount(); i++) {
  622. Part bodyPart = multipart.getBodyPart(i);
  623. if (bodyPart.getContent() instanceof Multipart mp) {
  624. this.reMultipart(account, subject, emailDate, mp, emailContentInfoDTOList);
  625. } else {
  626. this.rePart(account, subject, emailDate, bodyPart, emailContentInfoDTOList);
  627. }
  628. }
  629. }
  630. private String getSenderEmail(Message message) {
  631. Address[] senderAddress;
  632. try {
  633. senderAddress = message.getFrom();
  634. if (senderAddress == null || senderAddress.length == 0) {
  635. return null;
  636. }
  637. // 此时的address是含有编码(MIME编码方式)后的文本和实际的邮件地址
  638. String address = "";
  639. for (Address from : senderAddress) {
  640. if (StrUtil.isNotBlank(from.toString())) {
  641. address = from.toString();
  642. break;
  643. }
  644. }
  645. // 正则表达式匹配邮件地址
  646. Pattern pattern = Pattern.compile("<(\\S+)>");
  647. Matcher matcher = pattern.matcher(address);
  648. if (matcher.find()) {
  649. return matcher.group(1);
  650. }
  651. // //说明匹配不到,直接获取sender
  652. // Address sender = message.getSender();
  653. // if (sender == null) {
  654. // return address;
  655. // }
  656. // String senderEmail = sender.toString();
  657. // log.info("senderEmail:" + senderEmail + "====================");
  658. // if (senderEmail.contains("<") && senderEmail.contains(">") && senderEmail.indexOf("<") < senderEmail.indexOf(">")) {
  659. // senderEmail = senderEmail.substring(senderEmail.indexOf("<") + 1, senderEmail.length() - 1);
  660. // }
  661. // return senderEmail;
  662. } catch (MessagingException e) {
  663. log.error(e.getMessage(), e);
  664. }
  665. return null;
  666. }
  667. private Message[] getEmailMessage(Folder folder, String protocol, Date startDate) {
  668. try {
  669. if (protocol.contains("imap")) {
  670. // 获取邮件日期大于等于startDate的邮件(搜索条件只支持按天)
  671. SearchTerm startDateTerm = new ReceivedDateTerm(ComparisonTerm.GE, startDate);
  672. return folder.search(startDateTerm);
  673. } else {
  674. return folder.getMessages();
  675. }
  676. } catch (MessagingException e) {
  677. throw new RuntimeException(e);
  678. }
  679. }
  680. }