123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659 |
- package com.simuwang.daq.service;
- import cn.hutool.core.collection.CollUtil;
- import cn.hutool.core.collection.ListUtil;
- import cn.hutool.core.exceptions.ExceptionUtil;
- import cn.hutool.core.text.StrBuilder;
- import cn.hutool.core.util.StrUtil;
- import com.simuwang.base.common.enums.WaterMarkEnum;
- import com.simuwang.base.common.util.ExcelUtil;
- import com.simuwang.base.pojo.dto.ValuationPdfTransformToExcelDTO;
- import org.apache.commons.io.FileUtils;
- import org.apache.pdfbox.Loader;
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.pdfbox.text.PDFTextStripper;
- import org.apache.poi.ss.usermodel.Cell;
- import org.apache.poi.ss.usermodel.Row;
- import org.apache.poi.ss.usermodel.Sheet;
- import org.apache.poi.ss.usermodel.Workbook;
- import org.apache.poi.ss.util.CellRangeAddress;
- import org.apache.poi.xssf.usermodel.XSSFWorkbook;
- import org.slf4j.Logger;
- import org.slf4j.LoggerFactory;
- import org.springframework.beans.factory.annotation.Value;
- import org.springframework.stereotype.Component;
- import org.springframework.web.multipart.MultipartFile;
- import technology.tabula.*;
- import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
- import java.io.File;
- import java.io.IOException;
- import java.io.OutputStream;
- import java.nio.file.Files;
- import java.nio.file.Paths;
- import java.util.Arrays;
- import java.util.List;
- import java.util.stream.Collectors;
- @Component
- public class PdfToExcelService {
- private static final Logger log = LoggerFactory.getLogger(PdfToExcelService.class);
- @Value("${email.file.path}")
- private String path;
- private static final List<String> EXTRA_FIELD_LIST = ListUtil.toList("成本", "市值", "估值增值");
- private static final List<String> NUMBER_FIELD_LIST = ListUtil.toList("数量", "单位成本", "成本", "成本占净值",
- "成本占净值%", "成本占净值", "市价", "市值", "市值占净值比", "市值占净值比%");
- private static final List<String> TITLE_FIELD_LIST = ListUtil.toList("单位净值:", "位净值:", "估值日期:", "值日期:", "单位:",
- "科目代码", "科目名", "数量", "单位成本", "位成本", "成本", "成本占净值%", "本币", "十亿千百十",
- "成本占净值", "市价", "市值", "值", "市值占净值%", "市值占净", "估值增值", "停牌信息", "权益信息");
- public static final List<String> TOTAL_MARKET_FIELD = ListUtil.toList("资合计", "资类合计:", "资类合计:", "资净值", "基金资净值:",
- "基金资净值:", "资产资净值", "资资净值:", "资资净值:", "信托资净值:", "信托资净值:");
- /**
- * PDF类型的估值表文件转为excel类型
- *
- * @param fileList 待转换的文件
- * @return 转换为excel类型的文件
- */
- public List<ValuationPdfTransformToExcelDTO> pdfToExcel(List<File> fileList) {
- if (fileList == null) {
- return CollUtil.newArrayList();
- }
- List<File> pdfFileList = fileList.stream()
- .filter(e -> ExcelUtil.isPdf(e.getName())).collect(Collectors.toList());
- if (CollUtil.isEmpty(pdfFileList)) {
- return CollUtil.newArrayList();
- }
- List<ValuationPdfTransformToExcelDTO> pdfToExcelDTOList = CollUtil.newArrayList();
- String excelUploadDir = path + File.separator + "valuation_table_excel" + File.separator;
- for (File multipartFile : pdfFileList) {
- String excelFilePath = excelUploadDir + multipartFile.getName().replace(".pdf", "") + System.currentTimeMillis()+".xlsx";
- File savefile = new File(excelFilePath);
- if (!savefile.exists()) {
- if (!savefile.getParentFile().exists()) {
- savefile.getParentFile().mkdirs();
- savefile.getParentFile().setExecutable(true);
- }
- }
- try (OutputStream outputStream = Files.newOutputStream(Paths.get(excelFilePath))) {
- PDDocument document = Loader.loadPDF(multipartFile);
- //采用pdfBox读取PDF内容
- List<String> lines = readTextFromPDFUsingPdfBox(document);
- //判断PDF是否存在水印 以及 水印类型(waterMarkType为null,代表没有水印)
- WaterMarkEnum waterMark = getPdfWaterMarkType(lines);
- //创建一个新的Excel工作簿
- Workbook workbook = new XSSFWorkbook();
- Sheet sheet = workbook.createSheet("Sheet1");
- //1.处理header行(估值表,专用表,日期和单位净值)
- Integer headerRowNumber = handlerExcelHeader(lines, sheet, waterMark);
- //采用tabula技术提取表格内容
- List<String> lineList = extractTableFormPDFUsingTabula(document, waterMark);
- //2.额外处理标题行
- Integer rowNumber = handlerTitleField(lineList, sheet, headerRowNumber);
- //招商证券股份有限公司的估值表有特殊情况,存在两个标题字段列
- boolean isSpecial = rowNumber == 7;
- //3.合并标题行单元格
- mergeExcelCell(sheet, isSpecial);
- //4.遍历每一行并将其添加到Excel工作表中
- readDateToExcel(sheet, lineList, rowNumber, waterMark);
- //将Excel工作簿写入输出流
- workbook.write(outputStream);
- document.close();
- ValuationPdfTransformToExcelDTO toExcelDTO = new ValuationPdfTransformToExcelDTO();
- toExcelDTO.setOriginalFileName(multipartFile.getName());
- toExcelDTO.setExcelFile(new File(excelFilePath));
- toExcelDTO.setFileUrl(multipartFile.getAbsolutePath());
- pdfToExcelDTOList.add(toExcelDTO);
- } catch (Exception e) {
- log.error("pdf转为excel报错,堆栈:{}", ExceptionUtil.stacktraceToString(e));
- }
- }
- return pdfToExcelDTOList;
- }
- /**
- * 估值表_纽达投资利道永晟五号私募证券投资基金_SGL183_202312292024-02-26.pdf
- *
- * @param lines pdfBox读取PDF内容中的水印文字
- * @param waterMark 水印
- */
- private List<String> removeWaterMark(List<String> lines, WaterMarkEnum waterMark) {
- if (waterMark == null) {
- return lines;
- }
- String regex = waterMark.getRegex();
- return lines.stream().map(e -> e.replaceAll(regex, "")).filter(StrUtil::isNotBlank).collect(Collectors.toList());
- }
- private WaterMarkEnum getPdfWaterMarkType(List<String> lines) {
- if (CollUtil.isEmpty(lines)) {
- return null;
- }
- //pdf文本内容
- String content = String.join("", lines);
- for (WaterMarkEnum waterMarkEnum : WaterMarkEnum.values()) {
- boolean containsAllKeywords = true;
- String waterMarkText = waterMarkEnum.getText();
- String[] waterMarkTextArray = StrUtil.cut(waterMarkText, 1);
- for (String text : waterMarkTextArray) {
- if (!content.contains(text)) {
- containsAllKeywords = false;
- break;
- }
- }
- if (containsAllKeywords) {
- return waterMarkEnum;
- }
- }
- return null;
- }
- private List<String> extractTableFormPDFUsingTabula(PDDocument document, WaterMarkEnum waterMark) {
- StrBuilder tableDataText = new StrBuilder();
- PageIterator extract = new ObjectExtractor(document).extract();
- String regex = waterMark != null ? waterMark.getRegex() : null;
- if (StrUtil.isBlank(regex)) {
- while (extract.hasNext()) {
- Page next = extract.next();
- List<Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
- for (Table table : tableList) {
- List<List<RectangularTextContainer>> rows = table.getRows();
- for (List<RectangularTextContainer> row : rows) {
- String collect = row.stream().map(RectangularTextContainer::getText)
- .map(e -> e.replace(" ", ""))
- .collect(Collectors.joining(" "));
- tableDataText.append("\n").append(collect);
- }
- }
- }
- } else {
- while (extract.hasNext()) {
- Page next = extract.next();
- List<Table> tableList = new SpreadsheetExtractionAlgorithm().extract(next);
- for (Table table : tableList) {
- List<List<RectangularTextContainer>> rows = table.getRows();
- for (List<RectangularTextContainer> row : rows) {
- String collect = row.stream().map(RectangularTextContainer::getText)
- .map(e -> e.replace(" ", ""))
- .map(e -> e.replaceAll(regex, ""))
- .collect(Collectors.joining(" "));
- tableDataText.append("\n").append(collect);
- }
- }
- }
- }
- //将文本内容按行分割
- return Arrays.stream(tableDataText.toString().trim().split("\\n")).collect(Collectors.toList());
- }
- private List<String> readTextFromPDFUsingPdfBox(PDDocument document) {
- PDFTextStripper pdfTextStripper = new PDFTextStripper();
- pdfTextStripper.setSortByPosition(true);
- String text = "";
- try {
- text = pdfTextStripper.getText(document);
- } catch (IOException e) {
- log.error("error: read text from pdf...", e);
- }
- //将文本内容按行分割
- return Arrays.stream(text.split("\\n")).collect(Collectors.toList());
- }
- /**
- * 处理excel的表头行(估值表行,专用表行,日期和单位净值行)
- *
- * @param lines PDF每行内容
- * @param sheet 表格
- * @param waterMark 水印
- * @return 返回header所占的行数
- */
- private Integer handlerExcelHeader(List<String> lines, Sheet sheet, WaterMarkEnum waterMark) {
- //去掉水印文字(去掉pdfBox读取PDF内容中的水印文字)
- List<String> noWaterMarkText = removeWaterMark(lines, waterMark);
- //去掉以空格和换行字符开头的数据行:" \r"(国泰君安证券股份有限公司_嘉禾三号私募证券投资基金_专用表)
- noWaterMarkText = noWaterMarkText.stream().filter(this::filterHeadLine).collect(Collectors.toList());
- //遇到"科目代码"行之前的前三行数据为header行
- List<String> headLineList = CollUtil.newArrayList();
- for (String line : noWaterMarkText) {
- if ((StrUtil.isNotBlank(line) && line.trim().startsWith("科目代码"))) {
- break;
- }
- headLineList.add(line);
- }
- //head行过滤掉非以"科目"开头的数据行
- headLineList = headLineList.stream().filter(e -> e.startsWith("科目") || filterTitleField(e)).collect(Collectors.toList());
- //将head行写入表格
- for (int i = 0; i < headLineList.size(); i++) {
- Row row = sheet.createRow(i);
- List<String> split = splitHeaderRow(headLineList.get(i));
- for (int i1 = 0; i1 < split.size(); i1++) {
- Cell cell = row.createCell(i1);
- cell.setCellValue(split.get(i1));
- }
- }
- return sheet.getLastRowNum() + 1;
- }
- /**
- * 分隔和合并head行的内容
- *
- * @param headRowValue head行的内容
- * @return 处理后的head行的内容
- */
- private List<String> splitHeaderRow(String headRowValue) {
- if (StrUtil.isBlank(headRowValue)) {
- return CollUtil.newArrayList();
- }
- //国泰君安证券股份有限公司_嘉禾三号私募证券投资基金_专用表(pdfBox识别不到"_")
- if (headRowValue.contains("国泰君安证券股份有限公司") && !headRowValue.contains("_")) {
- List<String> collect = Arrays.stream(headRowValue.split(" ")).filter(StrUtil::isNotBlank).collect(Collectors.toList());
- return ListUtil.toList(String.join("_", collect));
- }
- return Arrays.stream(headRowValue.split(" ")).filter(StrUtil::isNotBlank).collect(Collectors.toList());
- }
- private boolean filterHeadLine(String text) {
- if (StrUtil.isBlank(text)) {
- return false;
- }
- if (text.trim().startsWith("成本")) {
- return false;
- }
- String replace = text.replace("\r", "").replace(" ", "");
- return !StrUtil.isBlank(replace);
- }
- private void readDateToExcel(Sheet sheet, List<String> lines, Integer rowNumber, WaterMarkEnum waterMark) {
- int baseRowNumber = 0;
- for (int i = 0; i <= sheet.getLastRowNum(); i++) {
- if ("科目代码".equals(sheet.getRow(i).getCell(0).getStringCellValue())) {
- baseRowNumber = i;
- break;
- }
- }
- for (String field : lines) {
- //判断field数据是否要写入表格
- boolean isNeedToWrite = canNeedToWrite(field);
- if (!isNeedToWrite) {
- continue;
- }
- Row row = sheet.createRow(rowNumber);
- List<String> split = Arrays.stream(field.split(" ")).collect(Collectors.toList());
- for (int i1 = 0; i1 < split.size(); i1++) {
- Cell cell = row.createCell(i1);
- //处理空格的问题
- String cellValue = StrUtil.isBlank(split.get(i1)) ? null : split.get(i1).replace("*", "");
- //要求为数值的字段,去掉非字符字段
- if (StrUtil.isNotBlank(cellValue) && NUMBER_FIELD_LIST.contains(sheet.getRow(baseRowNumber).getCell(i1).getStringCellValue())) {
- cellValue = filterNotNumber(cellValue);
- }
- //特殊处理:科目代码后面带有字母的,如1102.01.01.605056 SH、SQ、CFX等
- if (i1 == 0 && i1 + 1 < split.size()) {
- String nextCellValue = StrUtil.isBlank(split.get(i1 + 1)) ? null : split.get(i1 + 1).replace("*", "");
- if (StrUtil.isNotBlank(nextCellValue) && nextCellValue.matches("^[A-Z]+$")) {
- cellValue = (cellValue + " " + nextCellValue).trim();
- split.remove(1);
- }
- }
- cellValue = removeInfluenceOfWaterMark(cellValue, waterMark);
- cell.setCellValue(cellValue);
- }
- rowNumber++;
- }
- }
- /**
- * 处理水印文字造成的影响(总资产、资产净值两个字段的影响)
- *
- * @param cellValue 单元格内容
- * @param waterMark 水印
- * @return 无水印影响的单元格内容
- */
- private String removeInfluenceOfWaterMark(String cellValue, WaterMarkEnum waterMark) {
- if (StrUtil.isBlank(cellValue) || waterMark == null) {
- return cellValue;
- }
- String influenceText = StrUtil.isBlank(waterMark.getInfluenceText()) ? null : waterMark.getInfluenceText();
- if (StrUtil.isBlank(influenceText)) {
- return cellValue;
- }
- String rectifyText = waterMark.getRectifyText();
- return RestoreCellValue(cellValue, influenceText, rectifyText);
- }
- private String RestoreCellValue(String cellValue, String influenceText, String rectifyText) {
- if (StrUtil.isBlank(cellValue) || StrUtil.isBlank(influenceText) || StrUtil.isBlank(rectifyText)) {
- return cellValue;
- }
- StringBuilder sb = new StringBuilder();
- if ("产".equals(influenceText)) {
- for (int i = 0; i < cellValue.length(); i++) {
- if (cellValue.charAt(i) == '资') {
- sb.append(cellValue.charAt(i));
- sb.append("产");
- } else {
- sb.append(cellValue.charAt(i));
- }
- }
- }
- if ("资".equals(influenceText)) {
- for (int i = 0; i < cellValue.length(); i++) {
- if (cellValue.charAt(i) == '产') {
- sb.append("资");
- sb.append(cellValue.charAt(i));
- } else {
- sb.append(cellValue.charAt(i));
- }
- }
- }
- return sb.toString();
- }
- private String filterNotNumber(String cellValue) {
- StringBuilder numberStr = new StringBuilder();
- for (int i = 0; i < cellValue.length(); i++) {
- char c = cellValue.charAt(i);
- if (Character.isDigit(c) || c == '.' || c=='-') {
- numberStr.append(c);
- }
- }
- return numberStr.toString();
- }
- private boolean canNeedToWrite(String field) {
- //过滤以字段列开头的行
- boolean isCanContinue = filterTitleField(field);
- if (!isCanContinue) {
- return false;
- }
- //过滤包含"参考","第","页"的行
- if (field.contains("参考") || (field.contains("第") && field.contains("页"))) {
- return false;
- }
- //过滤掉空行
- List<String> titleFieldList = Arrays.stream(field.split(" ")).filter(StrUtil::isNotBlank).collect(Collectors.toList());
- if (CollUtil.isEmpty(titleFieldList)) {
- return false;
- }
- //过滤只有"%"的数据行
- boolean allPercent = titleFieldList.stream().allMatch("%"::equals);
- return !allPercent;
- }
- private boolean filterTitleField(String field) {
- if (StrUtil.isBlank(field)) {
- return false;
- }
- for (String titleField : TITLE_FIELD_LIST) {
- if (field.trim().startsWith(titleField)) {
- return false;
- }
- }
- return true;
- }
- /**
- * 处理excel的字段标题列
- *
- * @param lines PDF内容
- * @param sheet excel工作簿
- * @param headerRowNumber header行所占的行数
- */
- private Integer handlerTitleField(List<String> lines, Sheet sheet, Integer headerRowNumber) {
- Integer rowNumber = headerRowNumber;
- //1.先筛选出属于标题行的内容
- List<String> titleFieldList = getTitleFieldRow(lines);
- lines.removeAll(titleFieldList);
- //找到以"科目代码"开头的数据行
- int skipNumber = 0;
- for (String field : titleFieldList) {
- if (field.trim().startsWith("科目代码")) {
- break;
- }
- skipNumber++;
- }
- titleFieldList = titleFieldList.stream().skip(skipNumber).collect(Collectors.toList());
- //2.处理字段标题列
- //1-字段行占1行:正常的字段列11列
- if (titleFieldList.size() == 1) {
- handleOneRowTitleField(titleFieldList, sheet, rowNumber);
- return rowNumber + 1;
- } else if (titleFieldList.size() == 2) {
- //2-字段行占2行:第2行字段行一般是无用的字段:例如 %等
- //处理: 识别不到"原币"、"本币"所在的数据行,导致表格的数据对不上字段列
- String secondRowValue = titleFieldList.get(titleFieldList.size() - 1);
- if (secondRowValue.trim().contains("十亿千百十万千")) {
- int length = secondRowValue.trim().split(" ").length;
- if (length >= 6) {
- String extraRowValue = "原币 本币 原币 本币 原币 本币";
- titleFieldList.add(1, extraRowValue);
- }
- }
- if (titleFieldList.size() == 3) {
- handleThreeRowTitleField(titleFieldList, sheet, rowNumber);
- rowNumber += 3;
- } else {
- handleOneRowTitleField(titleFieldList, sheet, rowNumber);
- rowNumber++;
- }
- return rowNumber;
- } else if (titleFieldList.size() == 3) {
- //3-字段行占3行:有原币或者原币、本币字段的17列
- handleThreeRowTitleField(titleFieldList, sheet, rowNumber);
- return rowNumber + 3;
- } else if (titleFieldList.size() == 4) {
- //4-字段行占4行:存在两行"科目代码"的特殊字段列,14列(招商证券pdf估值表)
- handleFourRowTitleField(titleFieldList, sheet, rowNumber);
- return rowNumber + 4;
- }
- return rowNumber;
- }
- private void handleFourRowTitleField(List<String> titleFieldList, Sheet sheet, Integer rowNumber) {
- List<String> fisrtTitleList = Arrays.stream(titleFieldList.get(0).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
- List<String> secondTitleList = Arrays.stream(titleFieldList.get(1).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
- List<String> thirdTitleList = Arrays.stream(titleFieldList.get(2).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
- List<String> fourTitleList = Arrays.stream(titleFieldList.get(3).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
- //第一列"科目代码"
- Row firstRow = sheet.createRow(rowNumber);
- for (int i = 0; i < fisrtTitleList.size(); i++) {
- Cell firstCell = firstRow.createCell(i);
- firstCell.setCellValue(fisrtTitleList.get(i));
- }
- //第二个"科目代码"
- Row secondRow = sheet.createRow(rowNumber + 1);
- Row thirdRow = sheet.createRow(rowNumber + 2);
- Row fourRow = sheet.createRow(rowNumber + 3);
- int count = 0;
- for (int index = 0; index < secondTitleList.size(); index++) {
- Cell secondRowCell = secondRow.createCell(index);
- Cell thirdCell = thirdRow.createCell(index);
- Cell fourCell = fourRow.createCell(index);
- String field = secondTitleList.get(index);
- if (EXTRA_FIELD_LIST.contains(field)) {
- secondRowCell.setCellValue(field);
- thirdCell.setCellValue(thirdTitleList.get(count));
- fourCell.setCellValue(fourTitleList.get(count));
- count++;
- } else {
- secondRowCell.setCellValue(field);
- thirdCell.setCellValue(field);
- fourCell.setCellValue(field);
- }
- }
- }
- private void handleThreeRowTitleField(List<String> titleFieldList, Sheet sheet, Integer rowNumber) {
- //方案一:先填充每个字段,再合并单元格
- List<String> baseTitleList = CollUtil.newArrayList();
- List<String> nextTitleList = Arrays.stream(titleFieldList.get(1).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
- List<String> next2TitleList = Arrays.stream(titleFieldList.get(2).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
- //特殊处理下:如果包含了本币和原币,那么baseTitleList需要重复"成本", "市值", "估值增值"字段
- List<String> titleList = Arrays.stream(titleFieldList.get(0).trim().replace("\r", "").split(" ")).collect(Collectors.toList());
- if (nextTitleList.contains("原币") && nextTitleList.contains("本币")) {
- for (String titleField : titleList) {
- baseTitleList.add(titleField);
- if (EXTRA_FIELD_LIST.contains(titleField)) {
- baseTitleList.add(titleField);
- }
- }
- } else if (nextTitleList.contains("本币") || nextTitleList.contains("原币")) {
- baseTitleList.addAll(titleList);
- }
- Row firstRow = sheet.createRow(rowNumber);
- Row secondRow = sheet.createRow(rowNumber + 1);
- Row thirdRow = sheet.createRow(rowNumber + 2);
- int count = 0;
- for (int index = 0; index < baseTitleList.size(); index++) {
- Cell firstCell = firstRow.createCell(index);
- Cell secondRowCell = secondRow.createCell(index);
- Cell thirdCell = thirdRow.createCell(index);
- //特殊处理"成本", "市值", "估值增值"字段
- String field = baseTitleList.get(index);
- if (EXTRA_FIELD_LIST.contains(field)) {
- if (nextTitleList.contains("原币") || nextTitleList.contains("本币")) {
- firstCell.setCellValue(field);
- secondRowCell.setCellValue(nextTitleList.get(count));
- thirdCell.setCellValue(next2TitleList.get(count));
- count++;
- }
- } else {
- firstCell.setCellValue(field);
- secondRowCell.setCellValue(field);
- thirdCell.setCellValue(field);
- }
- }
- }
- private void handleOneRowTitleField(List<String> titleFieldList, Sheet sheet, Integer rowNumber) {
- Row row = sheet.createRow(rowNumber);
- List<String> baseTitleList = Arrays.stream(titleFieldList.get(0).split(" "))
- .filter(e -> !" ".equals(e) && StrUtil.isNotBlank(e)).collect(Collectors.toList());
- for (int i = 0; i < baseTitleList.size(); i++) {
- Cell cell = row.createCell(i);
- cell.setCellValue(baseTitleList.get(i));
- }
- }
- /**
- * 找到字段标题行
- *
- * @param lines PDF表格的所有行
- * @return 字段标题行
- */
- private List<String> getTitleFieldRow(List<String> lines) {
- List<String> titleFieldList = CollUtil.newArrayList();
- for (String line : lines) {
- //如果当前行的第一个值是整数(意味着当前的前一行为科目代码行了),那么说明已经找完所有标题行内容
- List<String> rowValueList = Arrays.stream(line.split(" ")).collect(Collectors.toList());
- String firstValueOfRow = CollUtil.isNotEmpty(rowValueList) ? rowValueList.get(0) : null;
- if (isInteger(firstValueOfRow)) {
- break;
- }
- if (StrUtil.isNotBlank(line)) {
- titleFieldList.add(line);
- }
- }
- return titleFieldList;
- }
- private void mergeExcelCell(Sheet sheet, boolean isSpecial) {
- if (sheet == null || sheet.getLastRowNum() < 4) {
- return;
- }
- //先找到"科目代码"字段列在excel中的行号
- Integer mixRowNumber = null;
- Integer maxRowNumber = null;
- for (int i = 0; i < sheet.getLastRowNum(); i++) {
- Row row = sheet.getRow(i);
- //找第一个"科目代码"出现的行数
- if ("科目代码".equals(row.getCell(0).toString())) {
- mixRowNumber = mixRowNumber != null ? mixRowNumber : i;
- mixRowNumber = isSpecial ? mixRowNumber + 1 : mixRowNumber;
- isSpecial = false;
- }
- //找最后一个"科目代码"出现的行数
- if (mixRowNumber != null && "科目代码".equals(row.getCell(0).toString()) && i == sheet.getLastRowNum() - 1) {
- maxRowNumber = i + 1;
- break;
- }
- }
- //参数分别为:起始行号、结束行号、起始列号、结束列号
- if (mixRowNumber != null && maxRowNumber != null && !mixRowNumber.equals(maxRowNumber)) {
- Row firstRow = sheet.getRow(mixRowNumber);
- Row lastRow = sheet.getRow(maxRowNumber);
- int rowNum = firstRow.getLastCellNum();
- for (int i = 0; i < rowNum; i++) {
- //不同行,同一列的上下单元格合并
- if (firstRow.getCell(i).toString().equals(lastRow.getCell(i).toString())) {
- sheet.addMergedRegion(new CellRangeAddress(mixRowNumber, maxRowNumber, i, i));
- }
- //同一行的左右两个单元格合并
- if (i + 1 < rowNum && firstRow.getCell(i).toString().equals(firstRow.getCell(i + 1).toString())) {
- sheet.addMergedRegion(new CellRangeAddress(mixRowNumber, mixRowNumber, i, i + 1));
- }
- }
- }
- }
- public boolean isInteger(String str) {
- if (StrUtil.isBlank(str)) {
- return false;
- }
- return str.matches("^-?\\d+$");
- }
- public File saveFile(MultipartFile file, String excelUploadDir, String newExcelName) {
- File targetFile = null;
- try {
- targetFile = new File(excelUploadDir, newExcelName);
- FileUtils.writeByteArrayToFile(targetFile, file.getBytes());
- } catch (IOException e) {
- e.printStackTrace();
- log.error("save file error,error -> {}", e.getMessage());
- return targetFile;
- }
- return targetFile;
- }
- public File saveFile(File file, String excelUploadDir, String newExcelName) {
- File targetFile = null;
- try {
- targetFile = new File(excelUploadDir, newExcelName);
- FileUtils.writeByteArrayToFile(targetFile, Files.readAllBytes(file.toPath()));
- } catch (IOException e) {
- e.printStackTrace();
- log.error("save file error,error -> {}", e.getMessage());
- return targetFile;
- }
- return targetFile;
- }
- }
|