ExcelUtil.java 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. package com.smppw.modaq.infrastructure.util;
  2. import cn.hutool.core.collection.CollUtil;
  3. import cn.hutool.core.collection.ListUtil;
  4. import cn.hutool.core.io.FileUtil;
  5. import cn.hutool.core.util.StrUtil;
  6. import net.sf.sevenzipjbinding.ExtractOperationResult;
  7. import net.sf.sevenzipjbinding.IInArchive;
  8. import net.sf.sevenzipjbinding.SevenZip;
  9. import net.sf.sevenzipjbinding.SevenZipException;
  10. import net.sf.sevenzipjbinding.impl.RandomAccessFileInStream;
  11. import net.sf.sevenzipjbinding.simple.ISimpleInArchive;
  12. import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
  13. import org.apache.commons.compress.archivers.ArchiveEntry;
  14. import org.apache.commons.compress.archivers.ArchiveException;
  15. import org.apache.commons.compress.archivers.ArchiveInputStream;
  16. import org.apache.commons.compress.archivers.ArchiveStreamFactory;
  17. import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
  18. import org.apache.commons.compress.archivers.zip.ZipFile;
  19. import org.apache.commons.io.IOUtils;
  20. import java.io.*;
  21. import java.nio.file.Files;
  22. import java.nio.file.Path;
  23. import java.nio.file.Paths;
  24. import java.util.Arrays;
  25. import java.util.Enumeration;
  26. import java.util.List;
  27. public class ExcelUtil {
  28. // 候选编码列表(按常见顺序排列)
  29. private static final List<String> CANDIDATE_ENCODINGS = Arrays.asList(
  30. "GBK", // 中文环境常用
  31. "UTF-8", // 标准编码
  32. "GB2312", // 旧版中文
  33. "ISO-8859-1" // 默认回退
  34. );
  35. public static boolean isExcel(String fileName) {
  36. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("xls") || fileName.endsWith("xlsx") || fileName.endsWith("XLS") || fileName.endsWith("XLSX"));
  37. }
  38. public static boolean isPdf(String fileName) {
  39. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("pdf") || fileName.endsWith("PDF"));
  40. }
  41. public static boolean isZip(String fileName) {
  42. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("zip") || fileName.endsWith("ZIP"));
  43. }
  44. public static boolean isHTML(String fileName) {
  45. return StrUtil.isNotBlank(fileName) && fileName.endsWith("html");
  46. }
  47. public static boolean isRAR(String fileName) {
  48. return StrUtil.isNotBlank(fileName) && (fileName.endsWith("rar") || fileName.endsWith("RAR"));
  49. }
  50. public static List<String> extractCompressedFiles(String zipFilePath, String destFilePath) throws IOException, ArchiveException {
  51. List<String> filePathList = CollUtil.newArrayList();
  52. File destFile = FileUtil.file(destFilePath);
  53. if (!destFile.exists()) {
  54. Files.createDirectories(destFile.toPath());
  55. }
  56. String encoding = detectEncoding(zipFilePath);
  57. if (encoding == null) {
  58. encoding = "GBK";
  59. }
  60. try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipFilePath));
  61. ArchiveInputStream<? extends ArchiveEntry> ais = new ArchiveStreamFactory()
  62. .createArchiveInputStream(ArchiveStreamFactory.detect(fis), fis, encoding)) {
  63. ArchiveEntry entry;
  64. while ((entry = ais.getNextEntry()) != null) {
  65. String name = entry.getName();
  66. if (entry.isDirectory()) {
  67. File entryFile = FileUtil.file(destFilePath, name);
  68. Files.createDirectories(entryFile.toPath());
  69. } else {
  70. if (name.startsWith("__MACOSX/")) {
  71. continue;
  72. }
  73. String zipFilename = FileUtil.getName(destFilePath);
  74. if (zipFilename.contains("确认") && !name.contains("确认")) {
  75. String ext = FileUtil.extName(name);
  76. name = StrUtil.subBefore(name, ".", true);
  77. name = name + "_确认单." + ext;
  78. }
  79. File entryFile = FileUtil.file(destFilePath, name);
  80. try (FileOutputStream fos = new FileOutputStream(entryFile)) {
  81. IOUtils.copy(ais, fos);
  82. filePathList.add(entryFile.getPath());
  83. }
  84. }
  85. }
  86. } catch (Exception e) {
  87. if (e.getMessage() != null
  88. && (e.getMessage().contains("split")
  89. || e.getMessage().contains("volume"))) {
  90. filePathList.addAll(extractSplitZip(zipFilePath, destFilePath, encoding));
  91. } else {
  92. throw e;
  93. }
  94. }
  95. return filePathList;
  96. }
  97. public static List<String> extractSplitZip(String zipFilePath, String destFilePath, String encoding) throws IOException {
  98. List<String> resultList = ListUtil.list(false);
  99. File file = new File(zipFilePath);
  100. try (ZipFile zipFile = ZipFile.builder().setFile(file).setCharset(encoding).get()) {
  101. Enumeration<ZipArchiveEntry> entries = zipFile.getEntries();
  102. while (entries.hasMoreElements()) {
  103. ZipArchiveEntry entry = entries.nextElement();
  104. // 解压到目标目录
  105. try (InputStream is = zipFile.getInputStream(entry)) {
  106. Path path = Paths.get(destFilePath, entry.getName());
  107. FileUtil.del(path);
  108. Files.copy(is, path);
  109. resultList.add(path.toAbsolutePath().toString());
  110. }
  111. }
  112. }
  113. return resultList;
  114. }
  115. public static List<String> extractRar5(String rarFilePath, String outputDir) throws Exception {
  116. // 初始化 SevenZipJBinding 本地库
  117. SevenZip.initSevenZipFromPlatformJAR();
  118. RandomAccessFile randomAccessFile = null;
  119. IInArchive inArchive = null;
  120. List<String> resultList = ListUtil.list(false);
  121. try {
  122. // 打开 RAR 文件
  123. randomAccessFile = new RandomAccessFile(rarFilePath, "r");
  124. inArchive = SevenZip.openInArchive(null, new RandomAccessFileInStream(randomAccessFile));
  125. // 获取压缩包中的文件列表
  126. ISimpleInArchive simpleInArchive = inArchive.getSimpleInterface();
  127. for (ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
  128. if (!item.isFolder()) {
  129. resultList.add(extractItem(item, outputDir));
  130. }
  131. }
  132. } finally {
  133. // 释放资源
  134. if (inArchive != null) {
  135. inArchive.close();
  136. }
  137. if (randomAccessFile != null) {
  138. randomAccessFile.close();
  139. }
  140. }
  141. return resultList;
  142. }
  143. private static String extractItem(ISimpleInArchiveItem item, String outputDir) throws SevenZipException {
  144. String filePath = outputDir + File.separator + item.getPath();
  145. File outputFile = FileUtil.file(filePath);
  146. // 创建父目录
  147. File parentDir = outputFile.getParentFile();
  148. if (!parentDir.exists() && !parentDir.mkdirs()) {
  149. throw new SevenZipException("无法创建目录: " + parentDir.getAbsolutePath());
  150. }
  151. // 提取文件内容
  152. try (FileOutputStream fos = new FileOutputStream(outputFile)) {
  153. ExtractOperationResult result = item.extractSlow(data -> {
  154. try {
  155. fos.write(data);
  156. return data.length; // 返回写入的字节数
  157. } catch (IOException e) {
  158. throw new SevenZipException("写入文件失败", e);
  159. }
  160. });
  161. if (result != ExtractOperationResult.OK) {
  162. throw new SevenZipException("解压失败: " + result);
  163. }
  164. } catch (IOException e) {
  165. throw new SevenZipException("文件操作失败", e);
  166. }
  167. return outputFile.getAbsolutePath();
  168. }
  169. // 检测压缩包编码
  170. private static String detectEncoding(String zipPath) {
  171. for (String encoding : CANDIDATE_ENCODINGS) {
  172. try (BufferedInputStream fis = new BufferedInputStream(new FileInputStream(zipPath));
  173. ArchiveInputStream<? extends ArchiveEntry> ais = new ArchiveStreamFactory()
  174. .createArchiveInputStream(ArchiveStreamFactory.detect(fis), fis, encoding)) {
  175. ArchiveEntry entry = ais.getNextEntry();
  176. if (entry == null) continue; // 空压缩包
  177. String fileName = entry.getName();
  178. if (!hasInvalidCharacters(fileName)) {
  179. return encoding; // 找到有效编码
  180. }
  181. } catch (Exception e) {
  182. // 编码不支持或文件错误,继续尝试下一个
  183. }
  184. }
  185. return null;
  186. }
  187. // 检查文件名是否包含无效字符(如替换符)
  188. private static boolean hasInvalidCharacters(String fileName) {
  189. // 检查常见乱码符号:�或连续问号
  190. return fileName.contains("�") || fileName.matches(".*\\?{2,}.*");
  191. }
  192. public static void main(String[] args) throws Exception {
  193. String zipFilePath = "D:\\home\\wwwroot\\mo_report_file\\wangzaijun@simuwang.com\\20250321\\20250321143709排排网确认单.rar";
  194. String destFilePath = "D:\\home\\wwwroot\\mo_report_file\\wangzaijun@simuwang.com\\20250321";
  195. List<String> strings = extractRar5(zipFilePath, destFilePath);
  196. for (String string : strings) {
  197. System.out.println(string);
  198. }
  199. // List<String> fileList = extractCompressedFiles(zipFilePath, destFilePath);
  200. // for (String s : fileList) {
  201. // System.out.println(s);
  202. // }
  203. }
  204. }