|
@@ -1,9 +1,7 @@
|
|
package com.smppw.modaq.application.components;
|
|
package com.smppw.modaq.application.components;
|
|
|
|
|
|
import cn.hutool.core.collection.ListUtil;
|
|
import cn.hutool.core.collection.ListUtil;
|
|
-import cn.hutool.core.map.MapUtil;
|
|
|
|
import cn.hutool.core.util.StrUtil;
|
|
import cn.hutool.core.util.StrUtil;
|
|
-import cn.hutool.http.HttpUtil;
|
|
|
|
import com.smppw.modaq.common.conts.Constants;
|
|
import com.smppw.modaq.common.conts.Constants;
|
|
import com.smppw.modaq.common.enums.ReportParseStatus;
|
|
import com.smppw.modaq.common.enums.ReportParseStatus;
|
|
import com.smppw.modaq.common.enums.ReportType;
|
|
import com.smppw.modaq.common.enums.ReportType;
|
|
@@ -11,7 +9,6 @@ import com.smppw.modaq.common.exception.ReportParseException;
|
|
import org.apache.pdfbox.Loader;
|
|
import org.apache.pdfbox.Loader;
|
|
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
|
|
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
|
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
import org.apache.pdfbox.pdmodel.PDDocument;
|
|
-import org.apache.pdfbox.text.PDFTextStripper;
|
|
|
|
import technology.tabula.CustomObjectExtractor;
|
|
import technology.tabula.CustomObjectExtractor;
|
|
import technology.tabula.Page;
|
|
import technology.tabula.Page;
|
|
import technology.tabula.PageIterator;
|
|
import technology.tabula.PageIterator;
|
|
@@ -21,7 +18,6 @@ import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
import java.util.Calendar;
|
|
import java.util.Calendar;
|
|
import java.util.List;
|
|
import java.util.List;
|
|
-import java.util.Map;
|
|
|
|
import java.util.Objects;
|
|
import java.util.Objects;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
import java.util.regex.Pattern;
|
|
@@ -161,14 +157,18 @@ public final class ReportParseUtils {
|
|
// ASSET_ALLOCATION_TYPE_MAPPER.put("其他融资总额", "基金负债情况");
|
|
// ASSET_ALLOCATION_TYPE_MAPPER.put("其他融资总额", "基金负债情况");
|
|
// }
|
|
// }
|
|
|
|
|
|
|
|
+ public static String cleaningValue(Object value) {
|
|
|
|
+ return cleaningValue(value, true);
|
|
|
|
+ }
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* 数据清洗,替换圆括号,包含中文或英文的圆括号
|
|
* 数据清洗,替换圆括号,包含中文或英文的圆括号
|
|
*
|
|
*
|
|
* @param value /
|
|
* @param value /
|
|
* @return /
|
|
* @return /
|
|
*/
|
|
*/
|
|
- public static String cleaningValue(Object value) {
|
|
|
|
- return cleaningValue(value, true);
|
|
|
|
|
|
+ public static String cleaningValue(Object value, boolean replaceEn) {
|
|
|
|
+ return cleaningValue(value, true, replaceEn);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -178,7 +178,7 @@ public final class ReportParseUtils {
|
|
* @param replaceParentheses 是否替换圆括号
|
|
* @param replaceParentheses 是否替换圆括号
|
|
* @return /
|
|
* @return /
|
|
*/
|
|
*/
|
|
- public static String cleaningValue(Object value, boolean replaceParentheses) {
|
|
|
|
|
|
+ public static String cleaningValue(Object value, boolean replaceParentheses, boolean replaceEn) {
|
|
String fieldValue = StrUtil.toStringOrNull(value);
|
|
String fieldValue = StrUtil.toStringOrNull(value);
|
|
if (!StrUtil.isNullOrUndefined(fieldValue)) {
|
|
if (!StrUtil.isNullOrUndefined(fieldValue)) {
|
|
// 特殊字符替换,空格替换为空字符
|
|
// 特殊字符替换,空格替换为空字符
|
|
@@ -188,6 +188,9 @@ public final class ReportParseUtils {
|
|
.replaceAll(":", ":")
|
|
.replaceAll(":", ":")
|
|
.replaceAll(" ", StrUtil.EMPTY)
|
|
.replaceAll(" ", StrUtil.EMPTY)
|
|
.replaceAll(":", StrUtil.EMPTY);
|
|
.replaceAll(":", StrUtil.EMPTY);
|
|
|
|
+ if (replaceEn) {
|
|
|
|
+ fieldValue = fieldValue.replaceAll("[a-zA-Z]", StrUtil.EMPTY);
|
|
|
|
+ }
|
|
if (replaceParentheses) {
|
|
if (replaceParentheses) {
|
|
// 正则表达式匹配中文括号及其内容,并替换为空字符串
|
|
// 正则表达式匹配中文括号及其内容,并替换为空字符串
|
|
fieldValue = Pattern.compile("[(|(][^)]*[)|)]").matcher(fieldValue).replaceAll(StrUtil.EMPTY);
|
|
fieldValue = Pattern.compile("[(|(][^)]*[)|)]").matcher(fieldValue).replaceAll(StrUtil.EMPTY);
|