补充记录一下。
指定编码
FileInputStream in = new FileInputStream("aaa.txt");
byte[] data = new byte[1024 * 8];
StringBuilder sb = new StringBuilder();
while(in.read(data) > 0) {
sb.append(new String(data,"utf8"); // 指定编码格式,避免乱码
}
System.out.println(sb.toString());
未知编码获取
1、引用
<!-- https://mvnrepository.com/artifact/net.sourceforge.cpdetector/cpdetector -->
<dependency>
<groupId>net.sourceforge.cpdetector</groupId>
<artifactId>cpdetector</artifactId>
<version>1.0.7</version>
</dependency>
2、工具类
public static String getFileEncode(String filePath) {
String charsetName = null;
try {
File file = new File(filePath);
CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
detector.add(new ParsingDetector(false));
detector.add(JChardetFacade.getInstance());
detector.add(ASCIIDetector.getInstance());
detector.add(UnicodeDetector.getInstance());
java.nio.charset.Charset charset = null;
charset = detector.detectCodepage(file.toURI().toURL());
if (charset != null) {
charsetName = charset.name();
} else {
charsetName = "UTF-8";
}
} catch (Exception ex) {
ex.printStackTrace();
return null;
}
return charsetName;
}