需求描述
- 读取某个目录下的所有**文件名**
具体实现
递归方式
- 递归方式读取普通目录
public static class LocalFileUtil {
static ArrayList<String> files = new ArrayList<String>();
/**
* 递归获取目录下的所有文件
*
* @param path
* @return
*/
public static ArrayList<String> getLocalFiles(String path) {
File file = new File(path);
File[] tempList = file.listFiles();
for (int i = 0; i < tempList.length; i++) {
if (tempList[i].isFile()) {
log.info("文件:" + tempList[i]);
if (tempList[i].toString().endsWith("Message.log")) {
files.add(tempList[i].toString());
}
}
if (tempList[i].isDirectory()) {
log.info("文件夹:" + tempList[i]);
getLocalFiles(tempList[i].toString());
}
}
ArrayList<String> resultList=files;
files.clear();//注意多线程环境下会有线程安全问题,使用完后清理掉
return resultList;
}
}
- 递归方式读取hdfs目录
public static class HDFSUtil {
static ArrayList<String> files = new ArrayList<String>();
/**
* 获取HDFS某目录下的所有文件
*
* @param filePath
* @return
* @throws IOException
*/
public static ArrayList<String> getHdfsFilePaths(String filePath) throws IOException {
FileSystem fs = FileSystem.newInstance(URI.create(filePath), new Configuration());
FileStatus[] listStatus = fs.listStatus(new Path(filePath));
for (FileStatus file : listStatus) {
if (file.isFile()) {
log.info("文件:" + file);
files.add(file.getPath().toString());
} else if (file.isDirectory()) {
log.info("文件夹:" + file);
getHdfsFilePaths(file.getPath().toString());
}
}
ArrayList<String> resultList=files;
files.clear();//注意多线程环境下会有线程安全问题,使用完后清理掉
return resultList;
}
}
非递归方式(推荐使用)
- 非递归方式读取普通目录
public static class LocalFileUtil {
private static ArrayList<String> getLocalFilePaths(String path) {
ArrayList<String> fileList = new ArrayList<String>();
File file = new File(path);
if (!file.exists()) {
log.info("文件不存在!");
} else {
LinkedList<File> dirlist = new LinkedList<File>();
File[] files = file.listFiles();
forFiles(files, dirlist, fileList);
File temp_file;
while (!dirlist.isEmpty()) {
//第一个文件夹
temp_file = dirlist.removeFirst();
files = temp_file.listFiles();
forFiles(files, dirlist, fileList);
}
}
return fileList;
}
private static void forFiles(File[] files, LinkedList<File> dirlist, ArrayList<String> fileList) {
for (File file2 : files) {
if (file2.isDirectory()) {
log.info("文件夹:" + file2.getAbsolutePath());
dirlist.add(file2);
} else {
log.info("文件:" + file2.getAbsolutePath());
fileList.add(file2.getAbsolutePath());
}
}
}
}
- 非递归方式读取hdfs普通目录
public static class HDFSUtil {
public static ArrayList<String> getHdfsFilePaths(String filePath) throws IOException {
ArrayList<String> fileList = new ArrayList<String>();
FileSystem fs = FileSystem.newInstance(URI.create(filePath), new Configuration());
if (fs == null) {
log.info("路径不存在!");
} else {
LinkedList<FileStatus> dirlist = new LinkedList<FileStatus>();
FileStatus[] fileStatus = fs.listStatus(new Path(filePath));
forFiles(fileStatus, dirlist, fileList);
FileStatus tempFileStatus;
while (!dirlist.isEmpty()) {
//第一个文件夹
tempFileStatus = dirlist.removeFirst();
filePath = tempFileStatus.getPath().toString();
fs = FileSystem.newInstance(URI.create(filePath), new Configuration());
fileStatus = fs.listStatus(new Path(filePath));
forFiles(fileStatus, dirlist, fileList);
}
}
return fileList;
}
private static void forFiles(FileStatus[] fileStatus, LinkedList<FileStatus> dirlist, ArrayList<String> fileList) {
for (FileStatus file : fileStatus) {
String path = file.getPath().toString();
if (file.isDirectory()) {
log.info("文件夹:" + path);
dirlist.add(file);
} else {
log.info("文件:" + path);
fileList.add(path);
}
}
}
}