删除HDFS集群中的所有空文件和空目录

1、连接HDFS

package api;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;

public class Utils {

    public static FileSystem HDFS() throws Exception{
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://potter2:9000");
        System.setProperty("HADOOP_USER_NAME", "potter");
        FileSystem fs = FileSystem.get(conf);
        return fs;
         
         
    }
}

2、主要代码

package api;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Test;

/**
 * 删除HDFS集群中的所有空文件和空目录
 * @author Administrator
 *
 */
public class Empty {

        @Test
        public void tt() throws Exception {
            Path path = new Path("/");
            Empty1(path);
        }
        
        public static void Empty1(Path path) throws Exception {
            FileSystem fs = Utils.HDFS();
            
            //当是空文件时,判断当前路径下有几个空文件夹
            FileStatus[] listStatus = fs.listStatus(path);
            System.out.println(listStatus.length+"********");
            //当根目录没有文件的时候会进入if里面
            if (listStatus.length == 0) {
                fs.delete(path,true);
            }
            System.out.println("删除成功xxxxx");
            //迭代器用于遍历
            RemoteIterator<LocatedFileStatus> listLocatedStatus = fs.listLocatedStatus(path);
        
            while (listLocatedStatus.hasNext()) {

                LocatedFileStatus next = listLocatedStatus.next();
                //输出文件夹的目录
                Path currentPath = next.getPath();
                System.out.println(currentPath+"1111111");
                //输出上面文件夹的父亲目录
                Path parent = next.getPath().getParent();
                System.out.println(parent+"2222222");
                if (next.isDirectory()) {
                    //如果是空文件夹
                    if (fs.listStatus(currentPath).length == 0) {
                        //删除掉
                        fs.delete(currentPath,true);
                    }else {
                        //不是空文件夹,那么继续遍历
                        if (fs.exists(currentPath)) {
                            Empty1(next.getPath());
                        }
                    }
                //如果是文件
                }else {
                    //获取文件的长度
                    long fileLength = next.getLen();
                    //当文件是空文件时,删除
                    if (fileLength ==0) {
                        fs.delete(currentPath,true);
                    }
                }
                int length = fs.listStatus(parent).length;
                if (length ==0) {
                    fs.delete(parent,true);
                }
                
            }
        }
}
原文地址:https://www.cnblogs.com/sangumaolu/p/8545510.html