Java API操作HDFS

package project.etl.core.util;

import java.io.FileNotFoundException;
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;

/**
* 操作HDFS工具类
* @author snow
* @date: 2019-10-13 10:01:59
*/
public class HDFSUtil {
  /**
  * 获取默认文件系统
  * @return
  * @throws IOException
  * @author snow
  * @date: 2019-10-13 10:16:18
  */
  public static FileSystem getFileSystem() throws IOException {
    Configuration conf = new Configuration();
    FileSystem fileSystem = FileSystem.get(conf);
    return fileSystem;
  }
  /**
  * 获取指定文件系统
  * @param fileSystemPath
  * @return fileSystem
  * @throws IOException
  * @author snow
  * @date: 2019-10-17 15:06:42
  */
  public static FileSystem getFileSystem(String fileSystemPath) throws IOException {
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", fileSystemPath);
    FileSystem fileSystem = FileSystem.get(conf);
    return fileSystem;
  }
  /**
  * 通过Hadoop配置文件获取文件系统
  * @param coreSite core-site.xml文件路径
  * @param hdfsSite hdfs-site.xml文件路径
  * @return fileSystem
  * @throws IOException
  * @author snow
  * @date: 2019-10-17 15:10:48
  */
  public static FileSystem getFileSystem(String fileSystemPath,String coreSite,String hdfsSite) throws IOException {
    Configuration conf = new Configuration();
    conf.addResource(new Path(coreSite));
    conf.addResource(new Path(hdfsSite));
    FileSystem fileSystem = FileSystem.get(conf);
    return fileSystem;
  }
  /**
  * 通过Hadoop配置文件获取文件系统并通过keytab验证
  * @param coreSite core-site.xml文件路径
  * @param hdfsSite hdfs-site.xml文件路径
  * @param krd5Path krd5.conf文件路径
  * @param user 用户名
  * @param keytab keytab验证文件路径
  * @return fileSystem
  * @throws IOException
  * @author snow
  * @date: 2019-10-17 15:15:18
  */
  public static FileSystem getFileSystem(String coreSite,String hdfsSite,String krd5Path,String user,String keytab) throws IOException {
    Configuration conf = new Configuration();
    conf.addResource(new Path(coreSite));
    conf.addResource(new Path(hdfsSite));
    //当程序不在hadoop集群内运行时设置
    System.setProperty("java.security.krb5.conf", krd5Path);
    UserGroupInformation.loginUserFromKeytab(user, keytab);
    FileSystem fileSystem = FileSystem.get(conf);
    return fileSystem;
  }
  /**
  * 通过Hadoop配置文件获取文件系统并通过keytab验证
  * @param coreSite core-site.xml文件路径
  * @param hdfsSite hdfs-site.xml文件路径
  * @param user 用户名
  * @param keytab keytab验证文件路径
  * @return fileSystem
  * @throws IOException
  * @author snow
  * @date: 2019-10-17 15:25:35
  */
  public static FileSystem getFileSystem(String coreSite,String hdfsSite,String user,String keytab) throws IOException {
    Configuration conf = new Configuration();
    conf.addResource(new Path(coreSite));
    conf.addResource(new Path(hdfsSite));
    UserGroupInformation.loginUserFromKeytab(user, keytab);
    FileSystem fileSystem = FileSystem.get(conf);
    return fileSystem;
  }
  /**
  * 创建文件夹
  * @param fileSystem
  * @param path
  * @throws IllegalArgumentException
  * @throws IOException
  * @author snow
  * @date: 2019-10-17 15:43:44
  */
  public static void mkdirsFolder(FileSystem fileSystem,String path) throws IllegalArgumentException, IOException {
    if("".equals(path)) {
      System.out.println("path不能为空!");
      return;
    }
    Path folder = new Path(path);
    if(!fileSystem.exists(folder)) {
      boolean bool = fileSystem.mkdirs(folder);
      if(bool) {
        System.out.println("文件夹创建成功!");
      }else {
        System.out.println("文件夹创建失败!");
      }
    }
  }
  /**
  * 向文件系统指定路径上传单个文件或文件夹
  * @param fileSystem
  * @param localPath
  * @param HDFSPath
  * @throws IOException
  * @author snow
  * @date: 2019-10-13 10:27:21
  */
  public static void uploadFileToHDFS(FileSystem fileSystem,Path localPath,Path HDFSPath) throws IOException {
    fileSystem.copyFromLocalFile(localPath, HDFSPath);
  }
  /**
  * 下载文件系统指定路径的单个文件或文件夹
  * @param fileSystem
  * @param HDFSPath
  * @param localPath
  * @author snow
  * @throws IOException
  * @date: 2019-10-13 11:02:54
  */
  public static void loadHDFSFile(FileSystem fileSystem,Path HDFSPath,Path localPath) throws IOException {
    fileSystem.copyToLocalFile(HDFSPath, localPath);
  }
  /**
  * 删除文件系统文件或空文件夹
  * @param fileSystem
  * @param HDFSPath
  * @throws IOException
  * @author snow
  * @date: 2019-10-13 11:33:22
  */
  public static void delHDFSFile(FileSystem fileSystem,Path HDFSPath) throws IOException {
    if(fileSystem.exists(HDFSPath)) {
      //禁止递归删除文件
      fileSystem.delete(HDFSPath, false);
    }
  }
  /**
  * 递归删除文件系统指定文件夹
  * @param fileSystem
  * @param HDFSPath
  * @throws IOException
  * @author snow
  * @date: 2019-10-13 11:33:22
  */
  public static void delHDFSFileEcho(FileSystem fileSystem,Path HDFSPath) throws IOException {
    if(fileSystem.exists(HDFSPath)) {
      //递归删除文件
      fileSystem.delete(HDFSPath, true);
    }
  }
  /**
  * 查看文件系统指定路径下的文件及目录
  * @param fileSystem
  * @param HDFSPath
  * @throws FileNotFoundException
  * @throws IOException
  * @author snow
  * @date: 2019-10-13 12:03:12
  */
  public static void checkHDFSFile(FileSystem fileSystem,Path HDFSPath) throws FileNotFoundException, IOException {
    if(fileSystem.exists(HDFSPath)) {
      FileStatus[] fileStatuses = fileSystem.listStatus(HDFSPath);
      for (FileStatus fileStatus : fileStatuses) {
        String isDir = fileStatus.isDirectory() ? "文件夹::" : "文件:";
        String path = fileStatus.getPath().toString();
        System.out.println(isDir + " " + path);
      }
    }else {
      System.out.println("文件或目录不存在!");
    }
  }
  /**
  * 关闭连接
  * @param fileSystem
  * @throws IOException
  * @author snow
  * @date: 2019-10-13 12:05:44
  */
  public static void close(FileSystem fileSystem) throws IOException {
    fileSystem.close();
  }
}

原文地址:https://www.cnblogs.com/drunkPullBreeze/p/11665869.html