`
mr.lili
  • 浏览: 149625 次
  • 性别: Icon_minigender_1
  • 来自: 成都
文章分类
社区版块
存档分类

java读取文件夹下面unicode编码文件内容,并转换为utf-8保存

 
阅读更多
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.FilterReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ConUniToUTF {

/**
* @param args
*/
public static void main(String[] args) {
System.out.println("aaaaa");

// 读取unicode编码
try {
// String path = "D:\\English\\BBC\\";// 读取该目录下的文件
// String path = "D:\\English\\BBC\\140317BBC\\140317_03";// 读取该目录下的文件
String path = "J:\\English\\BBC\\140317BBC\\02";// 读取该目录下的文件

String endName = "";
List<String> pathList = getFile(path);//获得该目录下的文件路径
String filepath = "";
for (int i = 0; i < pathList.size(); i++) {
filepath = pathList.get(i);
// String str = "J:\\English\\EnglishHtml\\www.52en.com_0004_jfk_address.jpg.png.mp3";
endName = filepath.substring(filepath.lastIndexOf(".")+1);
if("txt".equalsIgnoreCase(endName) || "text".equalsIgnoreCase(endName)
|| "lrc".equalsIgnoreCase(endName)
){
System.out.println(">>>>>>>>>>" + filepath);
printUTFTxt(filepath);//打印
}
}

} catch (Exception ex) {
ex.printStackTrace();
}

System.out.println("bbbbbbbbb");
}

public static void printUTFTxt(String filePath){
try {
// String filePath = "D:\\English\\BBC\\20120415BBC.lrc";
String content = readTxt(filePath);
// System.out.println(content);
// File writename = new File("D:\\English\\BBC\\20120415BBC.lrc");
File writename = new File(filePath);
// 相对路径,如果没有则要建立一个新的output。txt文件
writename.createNewFile(); // 创建新文件
BufferedWriter out = new BufferedWriter(new
FileWriter(writename));
out.write(content); // \r\n即为换行
out.flush(); // 把缓存区内容压入文件
out.close(); // 最后记得关闭文件
}catch (Exception e) {
// TODO: handle exception
}
}

/**
* 获得文件夹下的文件路径
* @param path
* @return
*/
private static List<String> getFile(String path) {

List<String> l = new ArrayList<String>();
// get file list where the path has
File file = new File(path);
// get the folder list
File[] array = file.listFiles();

for (int i = 0; i < array.length; i++) {
if (array[i].isFile()) {

// only take file name
// System.out.println("^^^^^" + array[i].getName());
// take file path and name
// System.out.println("#####" + array[i]);
// take file path and name
System.out.println("*****" + array[i].getPath());
// if(array[i].){
l.add(array[i].getPath());
// }
}/*
* else if(array[i].isDirectory()){ //如果是目录
* getFile(array[i].getPath()); //继续查看 }
*/
}
return l;
}

/**
* 解析普通文本文件 流式文件 如txt http://lfl2011.iteye.com/blog/1930107 此人博客
*
* @param path
* @return
*/
@SuppressWarnings("unused")
public static String readTxt(String path) {
StringBuilder content = new StringBuilder("");
try {
String code = resolveCode(path);
File file = new File(path);
InputStream is = new FileInputStream(file);
InputStreamReader isr = new InputStreamReader(is, code);
BufferedReader br = new BufferedReader(isr);
// char[] buf = new char[1024];
// int i = br.read(buf);
// String s= new String(buf);
// System.out.println(s);
String str = "";
while (null != (str = br.readLine())) {
content.append(str + "\r\n");
}
br.close();
} catch (Exception e) {
e.printStackTrace();
System.err.println("读取文件:" + path + "失败!");
}
return content.toString();
}

/**
* http://lfl2011.iteye.com/blog/1930107 此人博客
*
* @param path
* @return
* @throws Exception
*/
public static String resolveCode(String path) throws Exception {
// String filePath = "D:/article.txt"; //[-76, -85, -71] ANSI
// String filePath = "D:/article111.txt"; //[-2, -1, 79] unicode big
// endian
// String filePath = "D:/article222.txt"; //[-1, -2, 32] unicode
// String filePath = "D:/article333.txt"; //[-17, -69, -65] UTF-8
InputStream inputStream = new FileInputStream(path);
byte[] head = new byte[3];
inputStream.read(head);
String code = "gb2312"; // 或GBK
if (head[0] == -1 && head[1] == -2)
code = "UTF-16";
else if (head[0] == -2 && head[1] == -1)
code = "Unicode";
else if (head[0] == -17 && head[1] == -69 && head[2] == -65)
code = "UTF-8";

inputStream.close();

System.out.println(code);
return code;
}

}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics