ExcelUtil.java 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. package com.simuwang.base.common.util;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.date.DateUtil;
  4. import cn.hutool.core.exceptions.ExceptionUtil;
  5. import cn.hutool.core.lang.Pair;
  6. import cn.hutool.core.util.StrUtil;
  7. import com.simuwang.base.common.conts.DateConst;
  8. import com.simuwang.base.common.conts.EmailDataDirectionConst;
  9. import com.simuwang.base.pojo.dto.EmailContentInfoDTO;
  10. import org.apache.pdfbox.Loader;
  11. import org.apache.pdfbox.pdmodel.PDDocument;
  12. import org.apache.poi.hssf.usermodel.*;
  13. import org.apache.poi.ss.usermodel.*;
  14. import org.apache.poi.xssf.usermodel.XSSFWorkbook;
  15. import org.jsoup.Jsoup;
  16. import org.jsoup.nodes.Document;
  17. import org.jsoup.nodes.Element;
  18. import org.jsoup.select.Elements;
  19. import org.slf4j.Logger;
  20. import org.slf4j.LoggerFactory;
  21. import java.io.*;
  22. import java.nio.file.Files;
  23. import java.nio.file.Paths;
  24. import java.text.NumberFormat;
  25. import java.time.LocalDate;
  26. import java.time.format.DateTimeFormatter;
  27. import java.util.Date;
  28. import java.util.List;
  29. import java.util.Map;
  30. import org.apache.commons.compress.archivers.ArchiveEntry;
  31. import org.apache.commons.compress.archivers.ArchiveInputStream;
  32. import org.apache.commons.compress.archivers.ArchiveStreamFactory;
  33. import org.apache.commons.compress.utils.IOUtils;
  34. import technology.tabula.ObjectExtractor;
  35. import technology.tabula.Page;
  36. import technology.tabula.PageIterator;
  37. import technology.tabula.RectangularTextContainer;
  38. import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
  39. public class ExcelUtil {
  40. private static final Logger logger = LoggerFactory.getLogger(ExcelUtil.class);
  41. public static boolean isExcel(String fileName) {
  42. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("xls") || fileName.endsWith("xlsx") || fileName.endsWith("XLS") || fileName.endsWith("XLSX"));
  43. }
  44. public static boolean isPdf(String fileName) {
  45. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("pdf") || fileName.endsWith("PDF"));
  46. }
  47. public static boolean isZip(String fileName) {
  48. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("zip") || fileName.endsWith("ZIP"));
  49. }
  50. public static boolean isHTML(String fileName) {
  51. return StrUtil.isNotBlank(fileName) && fileName.endsWith("html");
  52. }
  53. public static Sheet getSheet(File file, int sheetIndex) {
  54. if (file == null || !file.exists()) {
  55. return null;
  56. }
  57. InputStream is = file2InStream(file);
  58. String[] arr = file.getName().split("\\.");
  59. String ext = arr[arr.length - 1];
  60. Sheet sheet = null;
  61. try {
  62. sheet = getSheet(is, ext, sheetIndex);
  63. } catch (Exception e) {
  64. logger.error(e.getMessage());
  65. if (e.getMessage().contains("XSSF instead of HSSF")) {
  66. is = file2InStream(file);
  67. try {
  68. sheet = getSheet(is, "xlsx", sheetIndex);
  69. } catch (IOException e1) {
  70. e1.printStackTrace();
  71. }
  72. } else if (e.getMessage().contains("HSSF instead of XSSF")) {
  73. is = file2InStream(file);
  74. try {
  75. sheet = getSheet(is, "xls", sheetIndex);
  76. } catch (IOException e1) {
  77. e1.printStackTrace();
  78. }
  79. } else if ("xls".equals(ext) && e.getMessage().contains("left 4 bytes remaining still to be read")) {
  80. file = changeXls(file);
  81. is = file2InStream(file);
  82. try {
  83. sheet = getSheet(is, "xls", sheetIndex);
  84. logger.info("文件转换成功!");
  85. } catch (IOException e1) {
  86. e1.printStackTrace();
  87. }
  88. } else {
  89. sheet = null;
  90. }
  91. }
  92. return sheet;
  93. }
  94. public static Sheet getSheet(InputStream is, String ext, int sheetIndex) throws IOException {
  95. if (ext == null) {
  96. ext = "xls";
  97. }
  98. ext = ext.toLowerCase();
  99. Sheet sheet = null;
  100. if ("xls".equals(ext)) {
  101. Workbook wb = new HSSFWorkbook(is);
  102. sheet = getSheet(wb, sheetIndex, null);
  103. } else if ("xlsx".equals(ext)) {
  104. Workbook wb = new XSSFWorkbook(is);
  105. sheet = getSheet(wb, sheetIndex, null);
  106. }
  107. return sheet;
  108. }
  109. private static Sheet getSheet(Workbook wb, int sheetIndex, String sheetName) throws IOException {
  110. if (wb != null) {
  111. Sheet sheet = null;
  112. if (sheetIndex > -1) {
  113. sheet = wb.getSheetAt(sheetIndex);
  114. } else if (StrUtil.isNotBlank(sheetName) && !"null".equals(sheetName.toLowerCase())) {
  115. sheet = wb.getSheet(sheetName);
  116. }
  117. wb.close();
  118. // 同时会把文件输入流关闭
  119. return sheet;
  120. }
  121. return null;
  122. }
  123. public static InputStream file2InStream(File file) {
  124. if (!file.exists()) {
  125. return null;
  126. } else {
  127. FileInputStream is = null;
  128. try {
  129. is = new FileInputStream(file);
  130. } catch (FileNotFoundException var3) {
  131. var3.printStackTrace();
  132. }
  133. return is;
  134. }
  135. }
  136. public static File changeXls(File file) {
  137. try {
  138. jxl.Workbook workbook = jxl.Workbook.getWorkbook(file);
  139. String fileName = "copy-" + file.getName();
  140. File file2 = new File(file.getParent() + "/" + fileName);
  141. if (file2.exists()) {
  142. return file2;
  143. }
  144. jxl.write.WritableWorkbook wwb = jxl.Workbook.createWorkbook(file2, workbook);
  145. wwb.write();
  146. wwb.close();
  147. workbook.close();
  148. return file2;
  149. } catch (Exception e) {
  150. e.printStackTrace();
  151. return null;
  152. }
  153. }
  154. public static String getCellValue(Cell cell) {
  155. if (cell == null) {
  156. return null;
  157. }
  158. String cellValue = "";
  159. switch (cell.getCellTypeEnum()) {
  160. case STRING:
  161. cellValue = cell.getStringCellValue();
  162. break;
  163. case NUMERIC:
  164. if (org.apache.poi.ss.usermodel.DateUtil.isCellDateFormatted(cell)) {
  165. // 如果是日期格式的数字
  166. Date dateCellValue = cell.getDateCellValue();
  167. cellValue = DateUtil.format(dateCellValue, DateConst.YYYY_MM_DD);
  168. } else {
  169. // 否则是纯数字
  170. NumberFormat numberFormat = NumberFormat.getNumberInstance();
  171. numberFormat.setMaximumFractionDigits(15);
  172. double formulaResult = cell.getNumericCellValue();
  173. cellValue = numberFormat.format(formulaResult).replaceAll(",", "");
  174. }
  175. break;
  176. case BOOLEAN:
  177. cellValue = String.valueOf(cell.getBooleanCellValue());
  178. break;
  179. case FORMULA:
  180. // 处理公式结果
  181. try {
  182. cellValue = String.valueOf(cell.getNumericCellValue());
  183. } catch (IllegalStateException e) {
  184. cellValue = cell.getStringCellValue();
  185. }
  186. break;
  187. case BLANK:
  188. break;
  189. case ERROR:
  190. cellValue = "ERROR: " + cell.getErrorCellValue();
  191. break;
  192. default:
  193. cellValue = "";
  194. break;
  195. }
  196. // 去掉换行符号
  197. cellValue = StrUtil.isNotBlank(cellValue) ? cellValue.replaceAll("[\\r\\n]+", "").replaceAll(",","").trim() : "";
  198. return cellValue;
  199. }
  200. public static List<String> extractCompressedFiles(String zipFilePath, String destFilePath) {
  201. List<String> filePathList = CollUtil.newArrayList();
  202. File destFile = new File(destFilePath);
  203. if (!destFile.exists()) {
  204. destFile.mkdirs();
  205. }
  206. try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipFilePath));
  207. ArchiveInputStream ais = new ArchiveStreamFactory().createArchiveInputStream(fis)) {
  208. ArchiveEntry entry;
  209. while ((entry = ais.getNextEntry()) != null) {
  210. File entryFile = new File(destFilePath, entry.getName());
  211. if (entry.isDirectory()) {
  212. entryFile.mkdirs();
  213. } else {
  214. try (FileOutputStream fos = new FileOutputStream(entryFile)) {
  215. IOUtils.copy(ais, fos);
  216. filePathList.add(entryFile.getPath());
  217. }
  218. }
  219. }
  220. } catch (Exception e) {
  221. e.printStackTrace();
  222. }
  223. return filePathList;
  224. }
  225. public static void writeDataToSheet(Sheet sheet, Elements rows) {
  226. int rowSize = rows.size();
  227. for (int rowNum = 0; rowNum < rowSize; rowNum++) {
  228. Row sheetRow = sheet.createRow(rowNum);
  229. Element elementRow = rows.get(rowNum);
  230. Elements cells = elementRow.select("td");
  231. if (cells.size() == 0) {
  232. cells = elementRow.select("th");
  233. }
  234. int cellSize = cells.size();
  235. for (int cellNum = 0; cellNum < cellSize; cellNum++) {
  236. Cell sheetRowCell = sheetRow.createCell(cellNum);
  237. sheetRowCell.setCellValue(cells.get(cellNum).text());
  238. }
  239. }
  240. }
  241. /**
  242. * 获取优先级高的字段值
  243. *
  244. * @param sheetRow 行
  245. * @param priorityPosition 字段位置
  246. * @param basePosition 优先级搞的字段位置
  247. * @return 优先级高的字段值
  248. */
  249. public static String getPriorityFieldValue(Row sheetRow, Integer priorityPosition, Integer basePosition) {
  250. boolean hasPriorityValue = priorityPosition != null && sheetRow.getCell(priorityPosition) != null && StrUtil.isNotBlank(ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition)));
  251. if (hasPriorityValue) {
  252. return ExcelUtil.getCellValue(sheetRow.getCell(priorityPosition));
  253. }
  254. return basePosition != null && sheetRow.getCell(basePosition) != null ? ExcelUtil.getCellValue(sheetRow.getCell(basePosition)) : null;
  255. }
  256. /**
  257. * 去掉逗号
  258. *
  259. * @param numberData 数字型字符串
  260. * @return 无逗号的数字型字符串
  261. */
  262. public static String numberDataStripCommas(String numberData) {
  263. if (StrUtil.isBlank(numberData)) {
  264. return null;
  265. }
  266. // pdf解析到的值带有",",比如:"10,656,097.37"
  267. String data = numberData.replaceAll(",", "");
  268. if (!StringUtil.isNumeric(data)) {
  269. return null;
  270. }
  271. return data;
  272. }
  273. public static HSSFWorkbook getHSSFWorkbook(String sheetName, List<String> title, Map<String, List<List<String>>> valueMap, HSSFWorkbook wb) {
  274. // 第一步,创建一个HSSFWorkbook,对应一个Excel文件
  275. if (wb == null) {
  276. wb = new HSSFWorkbook();
  277. }
  278. try {
  279. // 第二步,在workbook中添加一个sheet,对应Excel文件中的sheet
  280. HSSFSheet sheet = wb.createSheet(sheetName);
  281. // 第三步,在sheet中添加表头第0行,注意老版本poi对Excel的行数列数有限制
  282. HSSFRow row = sheet.createRow(0);
  283. // 第四步,创建单元格,并设置值表头 设置表头居中
  284. HSSFCellStyle style = wb.createCellStyle();
  285. style.setAlignment(HorizontalAlignment.LEFT);
  286. style.setWrapText(true);
  287. sheet.setColumnWidth(0, 10000);
  288. sheet.setColumnWidth(1, 10000);
  289. sheet.setColumnWidth(2, 5000);
  290. sheet.setColumnWidth(3, 8000);
  291. sheet.setColumnWidth(4, 8000);
  292. sheet.setColumnWidth(5, 5000);
  293. //声明列对象
  294. HSSFCell cell = null;
  295. //创建标题
  296. for (int i = 0; i < title.size(); i++) {
  297. cell = row.createCell(i);
  298. cell.setCellValue(title.get(i));
  299. cell.setCellStyle(style);
  300. }
  301. List<List<String>> values = valueMap.get(sheetName);
  302. //创建内容
  303. for (int i = 0; i < values.size(); i++) {
  304. row = sheet.createRow(i + 1);
  305. for (int j = 0; j < values.get(i).size(); j++) {
  306. //将内容按顺序赋给对应的列对象
  307. row.createCell(j).setCellValue(values.get(i).get(j));
  308. }
  309. }
  310. } catch (Exception e) {
  311. logger.error(e.getMessage(), e);
  312. }
  313. return wb;
  314. }
  315. public static Sheet getFirstSheet(String filePath) {
  316. if (StrUtil.isBlank(filePath)) {
  317. return null;
  318. }
  319. try {
  320. File file = new File(filePath);
  321. return ExcelUtil.getSheet(file, 0);
  322. } catch (Exception e) {
  323. logger.error("获取文件的sheet错误 -> 文件路径:{},堆栈信息为:{}", filePath, ExceptionUtil.stacktraceToString(e));
  324. }
  325. return null;
  326. }
  327. public static String pdfConvertToExcel(String filePath, String excelFilePath) {
  328. File savefile = new File(excelFilePath);
  329. if (!savefile.exists()) {
  330. if (!savefile.getParentFile().exists()) {
  331. savefile.getParentFile().mkdirs();
  332. savefile.getParentFile().setExecutable(true);
  333. }
  334. }
  335. try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) {
  336. PDDocument document = Loader.loadPDF(new File(filePath));
  337. PageIterator extract = new ObjectExtractor(document).extract();
  338. Workbook workbook = new XSSFWorkbook();
  339. Sheet sheet = workbook.createSheet("Sheet1");
  340. int preEndRowIndex =0;
  341. while (extract.hasNext()) {
  342. Page next = extract.next();
  343. List<technology.tabula.Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
  344. for (technology.tabula.Table table : tableList) {
  345. List<List<RectangularTextContainer>> rows = table.getRows();
  346. int rowLength = rows.size();
  347. for (int rowNum = 0; rowNum < rowLength; rowNum++) {
  348. Row sheetRow = sheet.createRow(rowNum + preEndRowIndex);
  349. List<RectangularTextContainer> textContainerList = rows.get(rowNum);
  350. for (int cellNum = 0; cellNum < textContainerList.size(); cellNum++) {
  351. Cell cell = sheetRow.createCell(cellNum);
  352. RectangularTextContainer textContainer = textContainerList.get(cellNum);
  353. if (textContainer != null) {
  354. cell.setCellValue(textContainer.getText());
  355. }
  356. }
  357. }
  358. preEndRowIndex = preEndRowIndex + rowLength -1;
  359. }
  360. }
  361. // 将Excel工作簿写入输出流
  362. workbook.write(outputStream);
  363. } catch (Exception e) {
  364. logger.error("解析邮件pdf附件报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  365. return null;
  366. }
  367. return excelFilePath;
  368. }
  369. public static String contentConvertToExcel(String emailContent, String excelFilePath) {
  370. Elements rows;
  371. try {
  372. Document doc = Jsoup.parse(emailContent);
  373. Element table = doc.select("table").first();
  374. rows = table.select("tr");
  375. } catch (Exception e) {
  376. return null;
  377. }
  378. File saveFile = new File(excelFilePath);
  379. if (!saveFile.exists()) {
  380. if (!saveFile.getParentFile().exists()) {
  381. saveFile.getParentFile().mkdirs();
  382. saveFile.getParentFile().setExecutable(true);
  383. }
  384. }
  385. try (OutputStream outputStream = new FileOutputStream(saveFile)) {
  386. // 创建一个新的Excel工作簿
  387. Workbook workbook = new XSSFWorkbook();
  388. Sheet sheet = workbook.createSheet("Sheet1");
  389. ExcelUtil.writeDataToSheet(sheet, rows);
  390. // 将Excel工作簿写入输出流
  391. workbook.write(outputStream);
  392. } catch (Exception e) {
  393. logger.error("邮件正文转换成excel报错 -> 堆栈信息:{}", ExceptionUtil.stacktraceToString(e));
  394. return null;
  395. }
  396. return excelFilePath;
  397. }
  398. /**
  399. * 通过表头所在位置判断是行数据还是列数据
  400. *
  401. * @param fieldPositionMap excel中表头所在的位置
  402. * @return 行方向-1,,列方向-2
  403. */
  404. public static Integer detectDataDirection(Map<String, Pair<Integer, Integer>> fieldPositionMap) {
  405. long count = fieldPositionMap.values().stream().map(Pair::getValue).distinct().count();
  406. return count == 1 ? EmailDataDirectionConst.COLUMN_DIRECTION_TYPE : EmailDataDirectionConst.ROW_DIRECTION_TYPE;
  407. }
  408. // Excel 的起始日期是 1900 年 1 月 1 日
  409. private static final LocalDate EXCEL_EPOCH_DATE = LocalDate.of(1900, 1, 1);
  410. public static String convertExcelDateToString(String dateNum) {
  411. LocalDate startDate = LocalDate.of(1900, 1, 1);
  412. LocalDate localDate = startDate.plusDays(Math.round(Double.valueOf(dateNum) - 2));
  413. DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
  414. return localDate.format(formatter);
  415. }
  416. }