PdfToExcelService.java 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659
  1. package com.simuwang.daq.service;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.collection.ListUtil;
  4. import cn.hutool.core.exceptions.ExceptionUtil;
  5. import cn.hutool.core.text.StrBuilder;
  6. import cn.hutool.core.util.StrUtil;
  7. import com.simuwang.base.common.enums.WaterMarkEnum;
  8. import com.simuwang.base.common.util.ExcelUtil;
  9. import com.simuwang.base.pojo.dto.ValuationPdfTransformToExcelDTO;
  10. import org.apache.commons.io.FileUtils;
  11. import org.apache.pdfbox.Loader;
  12. import org.apache.pdfbox.pdmodel.PDDocument;
  13. import org.apache.pdfbox.text.PDFTextStripper;
  14. import org.apache.poi.ss.usermodel.Cell;
  15. import org.apache.poi.ss.usermodel.Row;
  16. import org.apache.poi.ss.usermodel.Sheet;
  17. import org.apache.poi.ss.usermodel.Workbook;
  18. import org.apache.poi.ss.util.CellRangeAddress;
  19. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  20. import org.slf4j.Logger;
  21. import org.slf4j.LoggerFactory;
  22. import org.springframework.beans.factory.annotation.Value;
  23. import org.springframework.stereotype.Component;
  24. import org.springframework.web.multipart.MultipartFile;
  25. import technology.tabula.*;
  26. import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
  27. import java.io.File;
  28. import java.io.IOException;
  29. import java.io.OutputStream;
  30. import java.nio.file.Files;
  31. import java.nio.file.Paths;
  32. import java.util.Arrays;
  33. import java.util.List;
  34. import java.util.stream.Collectors;
  35. @Component
  36. public class PdfToExcelService {
  37. private static final Logger log = LoggerFactory.getLogger(PdfToExcelService.class);
  38. @Value("${email.file.path}")
  39. private String path;
  40. private static final List<String> EXTRA_FIELD_LIST = ListUtil.toList("成本", "市值", "估值增值");
  41. private static final List<String> NUMBER_FIELD_LIST = ListUtil.toList("数量", "单位成本", "成本", "成本占净值",
  42. "成本占净值%", "成本占净值", "市价", "市值", "市值占净值比", "市值占净值比%");
  43. private static final List<String> TITLE_FIELD_LIST = ListUtil.toList("单位净值:", "位净值:", "估值日期:", "值日期:", "单位:",
  44. "科目代码", "科目名", "数量", "单位成本", "位成本", "成本", "成本占净值%", "本币", "十亿千百十",
  45. "成本占净值", "市价", "市值", "值", "市值占净值%", "市值占净", "估值增值", "停牌信息", "权益信息");
  46. public static final List<String> TOTAL_MARKET_FIELD = ListUtil.toList("资合计", "资类合计:", "资类合计:", "资净值", "基金资净值:",
  47. "基金资净值:", "资产资净值", "资资净值:", "资资净值:", "信托资净值:", "信托资净值:");
  48. /**
  49. * PDF类型的估值表文件转为excel类型
  50. *
  51. * @param fileList 待转换的文件
  52. * @return 转换为excel类型的文件
  53. */
  54. public List<ValuationPdfTransformToExcelDTO> pdfToExcel(List<File> fileList) {
  55. if (fileList == null) {
  56. return CollUtil.newArrayList();
  57. }
  58. List<File> pdfFileList = fileList.stream()
  59. .filter(e -> ExcelUtil.isPdf(e.getName())).collect(Collectors.toList());
  60. if (CollUtil.isEmpty(pdfFileList)) {
  61. return CollUtil.newArrayList();
  62. }
  63. List<ValuationPdfTransformToExcelDTO> pdfToExcelDTOList = CollUtil.newArrayList();
  64. String excelUploadDir = path + File.separator + "valuation_table_excel" + File.separator;
  65. for (File multipartFile : pdfFileList) {
  66. String excelFilePath = excelUploadDir + multipartFile.getName().replace(".pdf", "") + System.currentTimeMillis()+".xlsx";
  67. File savefile = new File(excelFilePath);
  68. if (!savefile.exists()) {
  69. if (!savefile.getParentFile().exists()) {
  70. savefile.getParentFile().mkdirs();
  71. savefile.getParentFile().setExecutable(true);
  72. }
  73. }
  74. try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) {
  75. PDDocument document = Loader.loadPDF(multipartFile);
  76. //采用pdfBox读取PDF内容
  77. List<String> lines = readTextFromPDFUsingPdfBox(document);
  78. //判断PDF是否存在水印 以及 水印类型(waterMarkType为null,代表没有水印)
  79. WaterMarkEnum waterMark = getPdfWaterMarkType(lines);
  80. //创建一个新的Excel工作簿
  81. Workbook workbook = new XSSFWorkbook();
  82. Sheet sheet = workbook.createSheet("Sheet1");
  83. //1.处理header行(估值表,专用表,日期和单位净值)
  84. Integer headerRowNumber = handlerExcelHeader(lines, sheet, waterMark);
  85. //采用tabula技术提取表格内容
  86. List<String> lineList = extractTableFormPDFUsingTabula(document, waterMark);
  87. //2.额外处理标题行
  88. Integer rowNumber = handlerTitleField(lineList, sheet, headerRowNumber);
  89. //招商证券股份有限公司的估值表有特殊情况,存在两个标题字段列
  90. boolean isSpecial = rowNumber == 7;
  91. //3.合并标题行单元格
  92. mergeExcelCell(sheet, isSpecial);
  93. //4.遍历每一行并将其添加到Excel工作表中
  94. readDateToExcel(sheet, lineList, rowNumber, waterMark);
  95. //将Excel工作簿写入输出流
  96. workbook.write(outputStream);
  97. document.close();
  98. ValuationPdfTransformToExcelDTO toExcelDTO = new ValuationPdfTransformToExcelDTO();
  99. toExcelDTO.setOriginalFileName(multipartFile.getName());
  100. toExcelDTO.setExcelFile(new File(excelFilePath));
  101. toExcelDTO.setFileUrl(multipartFile.getAbsolutePath());
  102. pdfToExcelDTOList.add(toExcelDTO);
  103. } catch (Exception e) {
  104. log.error("pdf转为excel报错,堆栈:{}", ExceptionUtil.stacktraceToString(e));
  105. }
  106. }
  107. return pdfToExcelDTOList;
  108. }
  109. /**
  110. * 估值表_纽达投资利道永晟五号私募证券投资基金_SGL183_202312292024-02-26.pdf
  111. *
  112. * @param lines pdfBox读取PDF内容中的水印文字
  113. * @param waterMark 水印
  114. */
  115. private List<String> removeWaterMark(List<String> lines, WaterMarkEnum waterMark) {
  116. if (waterMark == null) {
  117. return lines;
  118. }
  119. String regex = waterMark.getRegex();
  120. return lines.stream().map(e -> e.replaceAll(regex, "")).filter(StrUtil::isNotBlank).collect(Collectors.toList());
  121. }
  122. private WaterMarkEnum getPdfWaterMarkType(List<String> lines) {
  123. if (CollUtil.isEmpty(lines)) {
  124. return null;
  125. }
  126. //pdf文本内容
  127. String content = String.join("", lines);
  128. for (WaterMarkEnum waterMarkEnum : WaterMarkEnum.values()) {
  129. boolean containsAllKeywords = true;
  130. String waterMarkText = waterMarkEnum.getText();
  131. String[] waterMarkTextArray = StrUtil.cut(waterMarkText, 1);
  132. for (String text : waterMarkTextArray) {
  133. if (!content.contains(text)) {
  134. containsAllKeywords = false;
  135. break;
  136. }
  137. }
  138. if (containsAllKeywords) {
  139. return waterMarkEnum;
  140. }
  141. }
  142. return null;
  143. }
  144. private List<String> extractTableFormPDFUsingTabula(PDDocument document, WaterMarkEnum waterMark) {
  145. StrBuilder tableDataText = new StrBuilder();
  146. PageIterator extract = new ObjectExtractor(document).extract();
  147. String regex = waterMark != null ? waterMark.getRegex() : null;
  148. if (StrUtil.isBlank(regex)) {
  149. while (extract.hasNext()) {
  150. Page next = extract.next();
  151. List<Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
  152. for (Table table : tableList) {
  153. List<List<RectangularTextContainer>> rows = table.getRows();
  154. for (List<RectangularTextContainer> row : rows) {
  155. String collect = row.stream().map(RectangularTextContainer::getText)
  156. .map(e -> e.replace(" ", ""))
  157. .collect(Collectors.joining(" "));
  158. tableDataText.append("\n").append(collect);
  159. }
  160. }
  161. }
  162. } else {
  163. while (extract.hasNext()) {
  164. Page next = extract.next();
  165. List<Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
  166. for (Table table : tableList) {
  167. List<List<RectangularTextContainer>> rows = table.getRows();
  168. for (List<RectangularTextContainer> row : rows) {
  169. String collect = row.stream().map(RectangularTextContainer::getText)
  170. .map(e -> e.replace(" ", ""))
  171. .map(e -> e.replaceAll(regex, ""))
  172. .collect(Collectors.joining(" "));
  173. tableDataText.append("\n").append(collect);
  174. }
  175. }
  176. }
  177. }
  178. //将文本内容按行分割
  179. return Arrays.stream(tableDataText.toString().trim().split("\\n")).collect(Collectors.toList());
  180. }
  181. private List<String> readTextFromPDFUsingPdfBox(PDDocument document) {
  182. PDFTextStripper pdfTextStripper = new PDFTextStripper();
  183. pdfTextStripper.setSortByPosition(true);
  184. String text = "";
  185. try {
  186. text = pdfTextStripper.getText(document);
  187. } catch (IOException e) {
  188. log.error("error: read text from pdf...", e);
  189. }
  190. //将文本内容按行分割
  191. return Arrays.stream(text.split("\\n")).collect(Collectors.toList());
  192. }
  193. /**
  194. * 处理excel的表头行(估值表行,专用表行,日期和单位净值行)
  195. *
  196. * @param lines PDF每行内容
  197. * @param sheet 表格
  198. * @param waterMark 水印
  199. * @return 返回header所占的行数
  200. */
  201. private Integer handlerExcelHeader(List<String> lines, Sheet sheet, WaterMarkEnum waterMark) {
  202. //去掉水印文字(去掉pdfBox读取PDF内容中的水印文字)
  203. List<String> noWaterMarkText = removeWaterMark(lines, waterMark);
  204. //去掉以空格和换行字符开头的数据行:" \r"(国泰君安证券股份有限公司_嘉禾三号私募证券投资基金_专用表)
  205. noWaterMarkText = noWaterMarkText.stream().filter(this::filterHeadLine).collect(Collectors.toList());
  206. //遇到"科目代码"行之前的前三行数据为header行
  207. List<String> headLineList = CollUtil.newArrayList();
  208. for (String line : noWaterMarkText) {
  209. if ((StrUtil.isNotBlank(line) && line.trim().startsWith("科目代码"))) {
  210. break;
  211. }
  212. headLineList.add(line);
  213. }
  214. //head行过滤掉非以"科目"开头的数据行
  215. headLineList = headLineList.stream().filter(e -> e.startsWith("科目") || filterTitleField(e)).collect(Collectors.toList());
  216. //将head行写入表格
  217. for (int i = 0; i < headLineList.size(); i++) {
  218. Row row = sheet.createRow(i);
  219. List<String> split = splitHeaderRow(headLineList.get(i));
  220. for (int i1 = 0; i1 < split.size(); i1++) {
  221. Cell cell = row.createCell(i1);
  222. cell.setCellValue(split.get(i1));
  223. }
  224. }
  225. return sheet.getLastRowNum() + 1;
  226. }
  227. /**
  228. * 分隔和合并head行的内容
  229. *
  230. * @param headRowValue head行的内容
  231. * @return 处理后的head行的内容
  232. */
  233. private List<String> splitHeaderRow(String headRowValue) {
  234. if (StrUtil.isBlank(headRowValue)) {
  235. return CollUtil.newArrayList();
  236. }
  237. //国泰君安证券股份有限公司_嘉禾三号私募证券投资基金_专用表(pdfBox识别不到"_")
  238. if (headRowValue.contains("国泰君安证券股份有限公司") && !headRowValue.contains("_")) {
  239. List<String> collect = Arrays.stream(headRowValue.split(" ")).filter(StrUtil::isNotBlank).collect(Collectors.toList());
  240. return ListUtil.toList(String.join("_", collect));
  241. }
  242. return Arrays.stream(headRowValue.split(" ")).filter(StrUtil::isNotBlank).collect(Collectors.toList());
  243. }
  244. private boolean filterHeadLine(String text) {
  245. if (StrUtil.isBlank(text)) {
  246. return false;
  247. }
  248. if (text.trim().startsWith("成本")) {
  249. return false;
  250. }
  251. String replace = text.replace("\r", "").replace(" ", "");
  252. return !StrUtil.isBlank(replace);
  253. }
  254. private void readDateToExcel(Sheet sheet, List<String> lines, Integer rowNumber, WaterMarkEnum waterMark) {
  255. int baseRowNumber = 0;
  256. for (int i = 0; i <= sheet.getLastRowNum(); i++) {
  257. if ("科目代码".equals(sheet.getRow(i).getCell(0).getStringCellValue())) {
  258. baseRowNumber = i;
  259. break;
  260. }
  261. }
  262. for (String field : lines) {
  263. //判断field数据是否要写入表格
  264. boolean isNeedToWrite = canNeedToWrite(field);
  265. if (!isNeedToWrite) {
  266. continue;
  267. }
  268. Row row = sheet.createRow(rowNumber);
  269. List<String> split = Arrays.stream(field.split(" ")).collect(Collectors.toList());
  270. for (int i1 = 0; i1 < split.size(); i1++) {
  271. Cell cell = row.createCell(i1);
  272. //处理空格的问题
  273. String cellValue = StrUtil.isBlank(split.get(i1)) ? null : split.get(i1).replace("*", "");
  274. //要求为数值的字段,去掉非字符字段
  275. if (StrUtil.isNotBlank(cellValue) && NUMBER_FIELD_LIST.contains(sheet.getRow(baseRowNumber).getCell(i1).getStringCellValue())) {
  276. cellValue = filterNotNumber(cellValue);
  277. }
  278. //特殊处理:科目代码后面带有字母的,如1102.01.01.605056 SH、SQ、CFX等
  279. if (i1 == 0 && i1 + 1 < split.size()) {
  280. String nextCellValue = StrUtil.isBlank(split.get(i1 + 1)) ? null : split.get(i1 + 1).replace("*", "");
  281. if (StrUtil.isNotBlank(nextCellValue) && nextCellValue.matches("^[A-Z]+$")) {
  282. cellValue = (cellValue + " " + nextCellValue).trim();
  283. split.remove(1);
  284. }
  285. }
  286. cellValue = removeInfluenceOfWaterMark(cellValue, waterMark);
  287. cell.setCellValue(cellValue);
  288. }
  289. rowNumber++;
  290. }
  291. }
  292. /**
  293. * 处理水印文字造成的影响(总资产、资产净值两个字段的影响)
  294. *
  295. * @param cellValue 单元格内容
  296. * @param waterMark 水印
  297. * @return 无水印影响的单元格内容
  298. */
  299. private String removeInfluenceOfWaterMark(String cellValue, WaterMarkEnum waterMark) {
  300. if (StrUtil.isBlank(cellValue) || waterMark == null) {
  301. return cellValue;
  302. }
  303. String influenceText = StrUtil.isBlank(waterMark.getInfluenceText()) ? null : waterMark.getInfluenceText();
  304. if (StrUtil.isBlank(influenceText)) {
  305. return cellValue;
  306. }
  307. String rectifyText = waterMark.getRectifyText();
  308. return RestoreCellValue(cellValue, influenceText, rectifyText);
  309. }
  310. private String RestoreCellValue(String cellValue, String influenceText, String rectifyText) {
  311. if (StrUtil.isBlank(cellValue) || StrUtil.isBlank(influenceText) || StrUtil.isBlank(rectifyText)) {
  312. return cellValue;
  313. }
  314. StringBuilder sb = new StringBuilder();
  315. if ("产".equals(influenceText)) {
  316. for (int i = 0; i < cellValue.length(); i++) {
  317. if (cellValue.charAt(i) == '资') {
  318. sb.append(cellValue.charAt(i));
  319. sb.append("产");
  320. } else {
  321. sb.append(cellValue.charAt(i));
  322. }
  323. }
  324. }
  325. if ("资".equals(influenceText)) {
  326. for (int i = 0; i < cellValue.length(); i++) {
  327. if (cellValue.charAt(i) == '产') {
  328. sb.append("资");
  329. sb.append(cellValue.charAt(i));
  330. } else {
  331. sb.append(cellValue.charAt(i));
  332. }
  333. }
  334. }
  335. return sb.toString();
  336. }
  337. private String filterNotNumber(String cellValue) {
  338. StringBuilder numberStr = new StringBuilder();
  339. for (int i = 0; i < cellValue.length(); i++) {
  340. char c = cellValue.charAt(i);
  341. if (Character.isDigit(c) || c == '.' || c=='-') {
  342. numberStr.append(c);
  343. }
  344. }
  345. return numberStr.toString();
  346. }
  347. private boolean canNeedToWrite(String field) {
  348. //过滤以字段列开头的行
  349. boolean isCanContinue = filterTitleField(field);
  350. if (!isCanContinue) {
  351. return false;
  352. }
  353. //过滤包含"参考","第","页"的行
  354. if (field.contains("参考") || (field.contains("第") && field.contains("页"))) {
  355. return false;
  356. }
  357. //过滤掉空行
  358. List<String> titleFieldList = Arrays.stream(field.split(" ")).filter(StrUtil::isNotBlank).collect(Collectors.toList());
  359. if (CollUtil.isEmpty(titleFieldList)) {
  360. return false;
  361. }
  362. //过滤只有"%"的数据行
  363. boolean allPercent = titleFieldList.stream().allMatch("%"::equals);
  364. return !allPercent;
  365. }
  366. private boolean filterTitleField(String field) {
  367. if (StrUtil.isBlank(field)) {
  368. return false;
  369. }
  370. for (String titleField : TITLE_FIELD_LIST) {
  371. if (field.trim().startsWith(titleField)) {
  372. return false;
  373. }
  374. }
  375. return true;
  376. }
  377. /**
  378. * 处理excel的字段标题列
  379. *
  380. * @param lines PDF内容
  381. * @param sheet excel工作簿
  382. * @param headerRowNumber header行所占的行数
  383. */
  384. private Integer handlerTitleField(List<String> lines, Sheet sheet, Integer headerRowNumber) {
  385. Integer rowNumber = headerRowNumber;
  386. //1.先筛选出属于标题行的内容
  387. List<String> titleFieldList = getTitleFieldRow(lines);
  388. lines.removeAll(titleFieldList);
  389. //找到以"科目代码"开头的数据行
  390. int skipNumber = 0;
  391. for (String field : titleFieldList) {
  392. if (field.trim().startsWith("科目代码")) {
  393. break;
  394. }
  395. skipNumber++;
  396. }
  397. titleFieldList = titleFieldList.stream().skip(skipNumber).collect(Collectors.toList());
  398. //2.处理字段标题列
  399. //1-字段行占1行:正常的字段列11列
  400. if (titleFieldList.size() == 1) {
  401. handleOneRowTitleField(titleFieldList, sheet, rowNumber);
  402. return rowNumber + 1;
  403. } else if (titleFieldList.size() == 2) {
  404. //2-字段行占2行:第2行字段行一般是无用的字段:例如 %等
  405. //处理: 识别不到"原币"、"本币"所在的数据行,导致表格的数据对不上字段列
  406. String secondRowValue = titleFieldList.get(titleFieldList.size() - 1);
  407. if (secondRowValue.trim().contains("十亿千百十万千")) {
  408. int length = secondRowValue.trim().split(" ").length;
  409. if (length >= 6) {
  410. String extraRowValue = "原币 本币 原币 本币 原币 本币";
  411. titleFieldList.add(1, extraRowValue);
  412. }
  413. }
  414. if (titleFieldList.size() == 3) {
  415. handleThreeRowTitleField(titleFieldList, sheet, rowNumber);
  416. rowNumber += 3;
  417. } else {
  418. handleOneRowTitleField(titleFieldList, sheet, rowNumber);
  419. rowNumber++;
  420. }
  421. return rowNumber;
  422. } else if (titleFieldList.size() == 3) {
  423. //3-字段行占3行:有原币或者原币、本币字段的17列
  424. handleThreeRowTitleField(titleFieldList, sheet, rowNumber);
  425. return rowNumber + 3;
  426. } else if (titleFieldList.size() == 4) {
  427. //4-字段行占4行:存在两行"科目代码"的特殊字段列,14列(招商证券pdf估值表)
  428. handleFourRowTitleField(titleFieldList, sheet, rowNumber);
  429. return rowNumber + 4;
  430. }
  431. return rowNumber;
  432. }
  433. private void handleFourRowTitleField(List<String> titleFieldList, Sheet sheet, Integer rowNumber) {
  434. List<String> fisrtTitleList = Arrays.stream(titleFieldList.get(0).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
  435. List<String> secondTitleList = Arrays.stream(titleFieldList.get(1).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
  436. List<String> thirdTitleList = Arrays.stream(titleFieldList.get(2).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
  437. List<String> fourTitleList = Arrays.stream(titleFieldList.get(3).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
  438. //第一列"科目代码"
  439. Row firstRow = sheet.createRow(rowNumber);
  440. for (int i = 0; i < fisrtTitleList.size(); i++) {
  441. Cell firstCell = firstRow.createCell(i);
  442. firstCell.setCellValue(fisrtTitleList.get(i));
  443. }
  444. //第二个"科目代码"
  445. Row secondRow = sheet.createRow(rowNumber + 1);
  446. Row thirdRow = sheet.createRow(rowNumber + 2);
  447. Row fourRow = sheet.createRow(rowNumber + 3);
  448. int count = 0;
  449. for (int index = 0; index < secondTitleList.size(); index++) {
  450. Cell secondRowCell = secondRow.createCell(index);
  451. Cell thirdCell = thirdRow.createCell(index);
  452. Cell fourCell = fourRow.createCell(index);
  453. String field = secondTitleList.get(index);
  454. if (EXTRA_FIELD_LIST.contains(field)) {
  455. secondRowCell.setCellValue(field);
  456. thirdCell.setCellValue(thirdTitleList.get(count));
  457. fourCell.setCellValue(fourTitleList.get(count));
  458. count++;
  459. } else {
  460. secondRowCell.setCellValue(field);
  461. thirdCell.setCellValue(field);
  462. fourCell.setCellValue(field);
  463. }
  464. }
  465. }
  466. private void handleThreeRowTitleField(List<String> titleFieldList, Sheet sheet, Integer rowNumber) {
  467. //方案一:先填充每个字段,再合并单元格
  468. List<String> baseTitleList = CollUtil.newArrayList();
  469. List<String> nextTitleList = Arrays.stream(titleFieldList.get(1).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
  470. List<String> next2TitleList = Arrays.stream(titleFieldList.get(2).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
  471. //特殊处理下:如果包含了本币和原币,那么baseTitleList需要重复"成本", "市值", "估值增值"字段
  472. List<String> titleList = Arrays.stream(titleFieldList.get(0).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
  473. if (nextTitleList.contains("原币") && nextTitleList.contains("本币")) {
  474. for (String titleField : titleList) {
  475. baseTitleList.add(titleField);
  476. if (EXTRA_FIELD_LIST.contains(titleField)) {
  477. baseTitleList.add(titleField);
  478. }
  479. }
  480. } else if (nextTitleList.contains("本币") || nextTitleList.contains("原币")) {
  481. baseTitleList.addAll(titleList);
  482. }
  483. Row firstRow = sheet.createRow(rowNumber);
  484. Row secondRow = sheet.createRow(rowNumber + 1);
  485. Row thirdRow = sheet.createRow(rowNumber + 2);
  486. int count = 0;
  487. for (int index = 0; index < baseTitleList.size(); index++) {
  488. Cell firstCell = firstRow.createCell(index);
  489. Cell secondRowCell = secondRow.createCell(index);
  490. Cell thirdCell = thirdRow.createCell(index);
  491. //特殊处理"成本", "市值", "估值增值"字段
  492. String field = baseTitleList.get(index);
  493. if (EXTRA_FIELD_LIST.contains(field)) {
  494. if (nextTitleList.contains("原币") || nextTitleList.contains("本币")) {
  495. firstCell.setCellValue(field);
  496. secondRowCell.setCellValue(nextTitleList.get(count));
  497. thirdCell.setCellValue(next2TitleList.get(count));
  498. count++;
  499. }
  500. } else {
  501. firstCell.setCellValue(field);
  502. secondRowCell.setCellValue(field);
  503. thirdCell.setCellValue(field);
  504. }
  505. }
  506. }
  507. private void handleOneRowTitleField(List<String> titleFieldList, Sheet sheet, Integer rowNumber) {
  508. Row row = sheet.createRow(rowNumber);
  509. List<String> baseTitleList = Arrays.stream(titleFieldList.get(0).split(" "))
  510. .filter(e -> !" ".equals(e) && StrUtil.isNotBlank(e)).collect(Collectors.toList());
  511. for (int i = 0; i < baseTitleList.size(); i++) {
  512. Cell cell = row.createCell(i);
  513. cell.setCellValue(baseTitleList.get(i));
  514. }
  515. }
  516. /**
  517. * 找到字段标题行
  518. *
  519. * @param lines PDF表格的所有行
  520. * @return 字段标题行
  521. */
  522. private List<String> getTitleFieldRow(List<String> lines) {
  523. List<String> titleFieldList = CollUtil.newArrayList();
  524. for (String line : lines) {
  525. //如果当前行的第一个值是整数(意味着当前的前一行为科目代码行了),那么说明已经找完所有标题行内容
  526. List<String> rowValueList = Arrays.stream(line.split(" ")).collect(Collectors.toList());
  527. String firstValueOfRow = CollUtil.isNotEmpty(rowValueList) ? rowValueList.get(0) : null;
  528. if (isInteger(firstValueOfRow)) {
  529. break;
  530. }
  531. if (StrUtil.isNotBlank(line)) {
  532. titleFieldList.add(line);
  533. }
  534. }
  535. return titleFieldList;
  536. }
  537. private void mergeExcelCell(Sheet sheet, boolean isSpecial) {
  538. if (sheet == null || sheet.getLastRowNum() < 4) {
  539. return;
  540. }
  541. //先找到"科目代码"字段列在excel中的行号
  542. Integer mixRowNumber = null;
  543. Integer maxRowNumber = null;
  544. for (int i = 0; i < sheet.getLastRowNum(); i++) {
  545. Row row = sheet.getRow(i);
  546. //找第一个"科目代码"出现的行数
  547. if ("科目代码".equals(row.getCell(0).toString())) {
  548. mixRowNumber = mixRowNumber != null ? mixRowNumber : i;
  549. mixRowNumber = isSpecial ? mixRowNumber + 1 : mixRowNumber;
  550. isSpecial = false;
  551. }
  552. //找最后一个"科目代码"出现的行数
  553. if (mixRowNumber != null && "科目代码".equals(row.getCell(0).toString()) && i == sheet.getLastRowNum() - 1) {
  554. maxRowNumber = i + 1;
  555. break;
  556. }
  557. }
  558. //参数分别为:起始行号、结束行号、起始列号、结束列号
  559. if (mixRowNumber != null && maxRowNumber != null && !mixRowNumber.equals(maxRowNumber)) {
  560. Row firstRow = sheet.getRow(mixRowNumber);
  561. Row lastRow = sheet.getRow(maxRowNumber);
  562. int rowNum = firstRow.getLastCellNum();
  563. for (int i = 0; i < rowNum; i++) {
  564. //不同行,同一列的上下单元格合并
  565. if (firstRow.getCell(i).toString().equals(lastRow.getCell(i).toString())) {
  566. sheet.addMergedRegion(new CellRangeAddress(mixRowNumber, maxRowNumber, i, i));
  567. }
  568. //同一行的左右两个单元格合并
  569. if (i + 1 < rowNum && firstRow.getCell(i).toString().equals(firstRow.getCell(i + 1).toString())) {
  570. sheet.addMergedRegion(new CellRangeAddress(mixRowNumber, mixRowNumber, i, i + 1));
  571. }
  572. }
  573. }
  574. }
  575. public boolean isInteger(String str) {
  576. if (StrUtil.isBlank(str)) {
  577. return false;
  578. }
  579. return str.matches("^-?\\d+$");
  580. }
  581. public File saveFile(MultipartFile file, String excelUploadDir, String newExcelName) {
  582. File targetFile = null;
  583. try {
  584. targetFile = new File(excelUploadDir, newExcelName);
  585. FileUtils.writeByteArrayToFile(targetFile, file.getBytes());
  586. } catch (IOException e) {
  587. e.printStackTrace();
  588. log.error("save file error,error -> {}", e.getMessage());
  589. return targetFile;
  590. }
  591. return targetFile;
  592. }
  593. public File saveFile(File file, String excelUploadDir, String newExcelName) {
  594. File targetFile = null;
  595. try {
  596. targetFile = new File(excelUploadDir, newExcelName);
  597. FileUtils.writeByteArrayToFile(targetFile, Files.readAllBytes(file.toPath()));
  598. } catch (IOException e) {
  599. e.printStackTrace();
  600. log.error("save file error,error -> {}", e.getMessage());
  601. return targetFile;
  602. }
  603. return targetFile;
  604. }
  605. }