|
@@ -1,490 +0,0 @@
|
|
|
-package com.smppw.utils;
|
|
|
-
|
|
|
-import cn.hutool.core.io.FileUtil;
|
|
|
-import net.sourceforge.pinyin4j.PinyinHelper;
|
|
|
-import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
|
|
|
-import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
|
|
|
-import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
|
|
|
-import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
|
|
|
-import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
|
|
|
-import org.apache.commons.lang3.StringUtils;
|
|
|
-
|
|
|
-import java.io.*;
|
|
|
-import java.util.*;
|
|
|
-import java.util.regex.Matcher;
|
|
|
-import java.util.regex.Pattern;
|
|
|
-
|
|
|
-/**
|
|
|
- * 中文转拼音
|
|
|
- * @author William
|
|
|
- */
|
|
|
-public class PinyinUtils {
|
|
|
- enum Number {
|
|
|
- zero(0, '零', "ling", "LING", 'l', 'L'), /**/
|
|
|
- one(1, '一', "yi", "YI", 'y', 'Y'), /**/
|
|
|
- two(2, '二', "er", "ER", 'e', 'E'), /**/
|
|
|
- three(3, '三', "san", "SAN", 's', 'S'), /**/
|
|
|
- four(4, '四', "si", "SI", 's', 'S'), /**/
|
|
|
- five(5, '五', "wu", "WU", 'w', 'W'), /**/
|
|
|
- six(6, '六', "liu", "LIU", 'l', 'L'), /**/
|
|
|
- seven(7, '七', "qi", "QI", 'q', 'Q'), /**/
|
|
|
- eight(8, '八', "ba", "BA", 'b', 'B'), /**/
|
|
|
- nine(9, '九', "jiu", "JIU", 'j', 'J');
|
|
|
-
|
|
|
- private int num;
|
|
|
- private char cn;
|
|
|
- private String pingYin;
|
|
|
- private String pingYinUpper;
|
|
|
- private char firstLetter;
|
|
|
- private char firstLetterUpper;
|
|
|
-
|
|
|
- Number(int num, char cn, String pingYin, String pingYinUpper, char firstLetter, char firstLetterUpper) {
|
|
|
- this.num = num;
|
|
|
- this.cn = cn;
|
|
|
- this.pingYin = pingYin;
|
|
|
- this.pingYinUpper = pingYinUpper;
|
|
|
- this.firstLetter = firstLetter;
|
|
|
- this.firstLetterUpper = firstLetterUpper;
|
|
|
- }
|
|
|
-
|
|
|
- public int getNum() {
|
|
|
- return num;
|
|
|
- }
|
|
|
-
|
|
|
- public char getCn() {
|
|
|
- return cn;
|
|
|
- }
|
|
|
-
|
|
|
- public String getPingYin() {
|
|
|
- return pingYin;
|
|
|
- }
|
|
|
-
|
|
|
- public String getPingYinUpper() {
|
|
|
- return pingYinUpper;
|
|
|
- }
|
|
|
-
|
|
|
- public char getFirstLetter() {
|
|
|
- return firstLetter;
|
|
|
- }
|
|
|
-
|
|
|
- public char getFirstLetterUpper() {
|
|
|
- return firstLetterUpper;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private static final Map<String, Number> numberMap = new HashMap<>();
|
|
|
- static {
|
|
|
- for (Number n : Number.values()) {
|
|
|
- numberMap.put(n.num + "", n);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private static Map<String, String> dictionary = new HashMap<String, String>();
|
|
|
- // 加载多音字词典
|
|
|
- static {
|
|
|
- try {
|
|
|
- // todo 验证
|
|
|
- File file = FileUtil.file("duoyinzi_pinyin.txt");
|
|
|
- if (file.exists()) {
|
|
|
- BufferedReader br = null;
|
|
|
- try {
|
|
|
- br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
|
|
|
- String line = null;
|
|
|
- while ((line = br.readLine()) != null) {
|
|
|
- String[] arr = line.split("#");
|
|
|
- if (arr[1] != null) {
|
|
|
- String[] sems = arr[1].trim().split("/");
|
|
|
- for (String sem : sems) {
|
|
|
- if (sem != null) {
|
|
|
- dictionary.put(sem, arr[0]);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- } catch (UnsupportedEncodingException e) {
|
|
|
- e.printStackTrace();
|
|
|
- } catch (IOException e) {
|
|
|
- e.printStackTrace();
|
|
|
- } finally {
|
|
|
- if (br != null) {
|
|
|
- try {
|
|
|
- br.close();
|
|
|
- } catch (IOException e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- } catch (Exception e) {
|
|
|
- e.printStackTrace();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 判断入参是否含中文字符
|
|
|
- */
|
|
|
- public static boolean containChinese(String str) {
|
|
|
- if(StringUtils.isEmpty(str))
|
|
|
- return false;
|
|
|
- Matcher m = Pattern.compile("[\\u4E00-\\u9FA5]").matcher(str);
|
|
|
- return m.find();
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- /** 返回拼音首字母, 字母全部转小写*/
|
|
|
- public static String getFirstLetterPinyin(String chinese) {
|
|
|
- if(StringUtils.isEmpty(chinese))
|
|
|
- return "";
|
|
|
-
|
|
|
- HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
|
|
|
- format.setCaseType(HanyuPinyinCaseType.LOWERCASE);// 小写格式
|
|
|
- format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 有无音标
|
|
|
- format.setVCharType(HanyuPinyinVCharType.WITH_V);
|
|
|
-
|
|
|
- char[] input = chinese.trim().toCharArray();
|
|
|
-
|
|
|
- StringBuilder output = new StringBuilder();
|
|
|
- try {
|
|
|
- for (int i = 0; i < input.length; i++) {
|
|
|
- char word = input[i];
|
|
|
-
|
|
|
- // ascii 48~57 means 0~9
|
|
|
- if (word >= 48 && word <= 57) {
|
|
|
- Number number = numberMap.get(word + "");
|
|
|
- output.append(number.getFirstLetter());
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // ascii 65~90 means A~Z
|
|
|
- if (word >= 65 && word <= 90) {
|
|
|
- output.append(Character.toString(word).toLowerCase());
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // ascii 97~122 means a~z
|
|
|
- if (word >= 97 && word <= 122) {
|
|
|
- output.append(word);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- if (Character.toString(word).matches("[\\u4E00-\\u9FA5]+")) {
|
|
|
- String[] pingYinArray = PinyinHelper.toHanyuPinyinStringArray(word, format);
|
|
|
-
|
|
|
- //特殊情况,字库里没有该字
|
|
|
- if (0 == pingYinArray.length) {
|
|
|
- String py = dictionary.get(Character.toString(word));
|
|
|
- if (null != py) {
|
|
|
- output.append(py.charAt(0));
|
|
|
- }
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- if (input.length == 1) {
|
|
|
- output.append(pingYinArray[0].charAt(0));
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- String pinyin = pingYinArray[0];
|
|
|
- boolean matchDic = false;
|
|
|
-
|
|
|
- if (i < input.length - 1) {
|
|
|
- String dic = Character.toString(word) + input[i + 1];
|
|
|
-
|
|
|
- for (String py : pingYinArray) {
|
|
|
- if (py.equals(dictionary.get(dic))) {
|
|
|
- pinyin = py;
|
|
|
- matchDic = true;
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- if (py.equals(dictionary.get(Character.toString(word)))) {
|
|
|
- pinyin = py;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if (!matchDic && i > 0) {
|
|
|
- String dic = input[i - 1] + Character.toString(word);
|
|
|
-
|
|
|
- for (String py : pingYinArray) {
|
|
|
- if (py.equals(dictionary.get(dic))
|
|
|
- || py.equals(dictionary.get(Character.toString(input[i])))) {
|
|
|
- pinyin = py;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- output.append(pinyin.charAt(0));
|
|
|
- }
|
|
|
- }
|
|
|
- } catch (BadHanyuPinyinOutputFormatCombination e1) {
|
|
|
- e1.printStackTrace();
|
|
|
- return "-";
|
|
|
- }
|
|
|
-
|
|
|
- return output.toString();
|
|
|
- }
|
|
|
-
|
|
|
- /** 返回拼音全拼, 字母全部转小写*/
|
|
|
- public static String getFullPinyin(String chinese) {
|
|
|
- if(StringUtils.isEmpty(chinese))
|
|
|
- return "";
|
|
|
-
|
|
|
- HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
|
|
|
- format.setCaseType(HanyuPinyinCaseType.LOWERCASE);// 小写格式
|
|
|
- format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 有无音标
|
|
|
- format.setVCharType(HanyuPinyinVCharType.WITH_V);
|
|
|
-
|
|
|
- char[] input = chinese.trim().toCharArray();
|
|
|
-
|
|
|
- StringBuilder output = new StringBuilder();
|
|
|
- try {
|
|
|
- for (int i = 0; i < input.length; i++) {
|
|
|
- char word = input[i];
|
|
|
-
|
|
|
- // ascii 48~57 means 0~9
|
|
|
- if (word >= 48 && word <= 57) {
|
|
|
- Number number = numberMap.get(word + "");
|
|
|
- output.append(number.getPingYin());
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // ascii 65~90 means A~Z
|
|
|
- if (word >= 65 && word <= 90) {
|
|
|
- output.append(Character.toString(word).toLowerCase());
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // ascii 97~122 means a~z
|
|
|
- if (word >= 97 && word <= 122) {
|
|
|
- output.append(word);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- if (Character.toString(word).matches("[\\u4E00-\\u9FA5]+")) {
|
|
|
- String[] pingYinArray = PinyinHelper.toHanyuPinyinStringArray(word, format);
|
|
|
-
|
|
|
- //特殊情况,字库里没有该字
|
|
|
- if (0 == pingYinArray.length) {
|
|
|
- String py = dictionary.get(Character.toString(word));
|
|
|
- if (null != py) {
|
|
|
- output.append(py);
|
|
|
- }
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- if (input.length == 1) {
|
|
|
- output.append(pingYinArray[0]);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- String pinyin = pingYinArray[0];
|
|
|
- boolean matchDic = false;
|
|
|
- if (i < input.length - 1) {
|
|
|
- String dic = Character.toString(word) + input[i + 1];
|
|
|
-
|
|
|
- for (String py : pingYinArray) {
|
|
|
-
|
|
|
- if (py.equals(dictionary.get(dic))) {
|
|
|
- matchDic = true;
|
|
|
- pinyin = py;
|
|
|
- break;
|
|
|
- }
|
|
|
-
|
|
|
- if (py.equals(dictionary.get(Character.toString(word)))) {
|
|
|
- pinyin = py;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if (!matchDic && i > 0) {
|
|
|
- String dic = input[i - 1] + Character.toString(word);
|
|
|
-
|
|
|
- for (String py : pingYinArray) {
|
|
|
- if (py.equals(dictionary.get(dic))
|
|
|
- || py.equals(dictionary.get(Character.toString(input[i])))) {
|
|
|
- pinyin = py;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- output.append(pinyin);
|
|
|
- }
|
|
|
- }
|
|
|
- } catch (BadHanyuPinyinOutputFormatCombination e1) {
|
|
|
- e1.printStackTrace();
|
|
|
- return "-";
|
|
|
- }
|
|
|
-
|
|
|
- return output.toString();
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 返回拼音首字母
|
|
|
- * @param chinese 中文
|
|
|
- * @param keepAllPolyphone 保留所有多音字,不保留则返回所有多音字中默认第一个
|
|
|
- * @return
|
|
|
- */
|
|
|
- @SuppressWarnings("unused")
|
|
|
- private static String pinyinFirstLetter(String chinese, boolean keepAllPolyphone) {
|
|
|
- boolean filteSpecialChar = true;
|
|
|
- boolean replaceNumber = true;
|
|
|
- boolean isUpper = false;
|
|
|
- boolean isFullPingYin = false;
|
|
|
- return pinyin(chinese, filteSpecialChar, replaceNumber, isUpper, isFullPingYin, keepAllPolyphone);
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * @param chinese 中文
|
|
|
- * @param filteSpecialChar 是否过滤特殊字符
|
|
|
- * @param replaceNumber 是否替换阿拉伯数字为中文
|
|
|
- * @param isUpper 是否大写
|
|
|
- * @param isFullPingYin 是否全拼
|
|
|
- * @param keepAllPolyphone 保留所有多音字,不保留则返回所有多音字中默认第一个
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static String pinyin(String chinese, boolean filteSpecialChar, boolean replaceNumber, boolean isUpper, boolean isFullPingYin, boolean keepAllPolyphone) {
|
|
|
- if (null == chinese || chinese.trim().length() == 0)
|
|
|
- return chinese;
|
|
|
-
|
|
|
- HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
|
|
|
- format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
|
|
|
- format.setVCharType(HanyuPinyinVCharType.WITH_V);
|
|
|
-
|
|
|
- if (isUpper)
|
|
|
- format.setCaseType(HanyuPinyinCaseType.UPPERCASE);
|
|
|
- else
|
|
|
- format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
|
|
|
-
|
|
|
- char[] input = chinese.trim().toCharArray();
|
|
|
-
|
|
|
- List<List<String>> list = new ArrayList<>(input.length);
|
|
|
-
|
|
|
- try {
|
|
|
- for (int i = 0; i < input.length; i++) {
|
|
|
- char word = input[i];
|
|
|
-
|
|
|
- List<String> polyphoneList = new ArrayList<>();
|
|
|
-
|
|
|
- // ascii 48~57 means 0~9
|
|
|
- if (word >= 48 && word <= 57) {
|
|
|
- if (replaceNumber) {
|
|
|
- Number number = numberMap.get(word + "");
|
|
|
-
|
|
|
- if (!isUpper && !isFullPingYin)
|
|
|
- polyphoneList.add(number.getFirstLetter() + "");
|
|
|
- else if (!isUpper && isFullPingYin)
|
|
|
- polyphoneList.add(number.getPingYin());
|
|
|
- else if (isUpper && isFullPingYin)
|
|
|
- polyphoneList.add(number.getPingYinUpper());
|
|
|
- else
|
|
|
- polyphoneList.add(number.getFirstLetterUpper() + "");
|
|
|
-
|
|
|
- } else
|
|
|
- polyphoneList.add(word + "");
|
|
|
-
|
|
|
- list.add(polyphoneList);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // ascii 65~90 means A~Z
|
|
|
- if (word >= 65 && word <= 90) {
|
|
|
- if (isUpper)
|
|
|
- polyphoneList.add(word + "");
|
|
|
- else
|
|
|
- polyphoneList.add(Character.toString(word).toLowerCase());
|
|
|
-
|
|
|
- list.add(polyphoneList);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // ascii 97~122 means a~z
|
|
|
- if (word >= 97 && word <= 122) {
|
|
|
- if (isUpper)
|
|
|
- polyphoneList.add(Character.toString(word).toUpperCase());
|
|
|
- else
|
|
|
- polyphoneList.add(word + "");
|
|
|
-
|
|
|
- list.add(polyphoneList);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // if it is chinese
|
|
|
- if (Character.toString(word).matches("[\\u4E00-\\u9FA5]+")) {
|
|
|
- String[] pingYinArray = PinyinHelper.toHanyuPinyinStringArray(word, format);
|
|
|
-
|
|
|
- if (keepAllPolyphone) {
|
|
|
- //去重
|
|
|
- Set<String> set = new HashSet<>();
|
|
|
- for (String s : pingYinArray)
|
|
|
- set.add(s);
|
|
|
-
|
|
|
- Iterator<String> iterator = set.iterator();
|
|
|
- while (iterator.hasNext()) {
|
|
|
- String py = iterator.next();
|
|
|
- if (isFullPingYin)
|
|
|
- polyphoneList.add(py);
|
|
|
- else if (!polyphoneList.contains(py.charAt(0) + ""))
|
|
|
- polyphoneList.add(py.charAt(0) + "");
|
|
|
- }
|
|
|
- } else {
|
|
|
- if (isFullPingYin)
|
|
|
- polyphoneList.add(pingYinArray[0]);
|
|
|
- else
|
|
|
- polyphoneList.add(pingYinArray[0].charAt(0) + "");
|
|
|
- }
|
|
|
-
|
|
|
- list.add(polyphoneList);
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // it is a special character
|
|
|
- if (!filteSpecialChar) {
|
|
|
- polyphoneList.add(Character.toString(word));
|
|
|
- list.add(polyphoneList);
|
|
|
- }
|
|
|
- }
|
|
|
- } catch (BadHanyuPinyinOutputFormatCombination e) {
|
|
|
- e.printStackTrace();
|
|
|
-
|
|
|
- return "-";
|
|
|
- }
|
|
|
-
|
|
|
- if (0 == list.size())
|
|
|
- return "";
|
|
|
-
|
|
|
- //全排列输出
|
|
|
- List<String> theList = list.get(0);
|
|
|
- for (int i = 1; i < list.size(); i++) {
|
|
|
- theList = combile(theList, list.get(i));
|
|
|
- }
|
|
|
- return String.join(",", theList);
|
|
|
-
|
|
|
- //正则输出
|
|
|
- // StringBuilder output = new StringBuilder();
|
|
|
- // for (int i = 0; i < list.size(); i++) {
|
|
|
- // List<String> polyphone = list.get(i);
|
|
|
- //
|
|
|
- // if (polyphone.size() > 1)
|
|
|
- // output.append("[").append(String.join("", polyphone)).append("]");
|
|
|
- // else
|
|
|
- // output.append(polyphone.get(0));
|
|
|
- // }
|
|
|
- // return output.toString();
|
|
|
- }
|
|
|
-
|
|
|
- private static List<String> combile(List<String> aList, List<String> bList) {
|
|
|
- List<String> cList = new ArrayList<>(aList.size() * bList.size());
|
|
|
- for (int i = 0; i < aList.size(); i++) {
|
|
|
- for (int j = 0; j < bList.size(); j++) {
|
|
|
- cList.add(aList.get(i) + bList.get(j));
|
|
|
- }
|
|
|
- }
|
|
|
- return cList;
|
|
|
- }
|
|
|
-
|
|
|
-}
|