HDFS Java API 常用操作

package com.luogankun.hadoop.hdfs.api;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

/**
 * HDFS Java API操作
 * @author luogk
 *
 */
public class HDFSApp {

    public static final String HDFS_PATH = "hdfs://hadoop000:8020";
    
    Configuration configuration = null;
    FileSystem fileSystem = null;
    
    @Before
    public void setUp() throws Exception{
        System.out.println("HDFSApp.setUp()");
        configuration = new Configuration();
        fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration);
    }
    

    /**
     * 创建目录
     */
    @Test
    public void mkdir() throws Exception {
        fileSystem.mkdirs(new Path("/hdfsapi/test"));
    }
    
    /**
     * 创建文件
     */
    @Test
    public void create() throws Exception  {
        FSDataOutputStream output = fileSystem.create(new Path("/hdfsapi/test/a.txt"));
        output.write("hello world".getBytes());
        output.flush();
        output.close();
    }
    
    /**
     * 重命名
     */
    @Test
    public void rename() throws Exception  {
        Path oldPath = new Path("/hdfsapi/test/a.txt");
        Path newPath = new Path("/hdfsapi/test/b.txt");
        System.out.println(fileSystem.rename(oldPath, newPath));
    }
    
    /**
     * 上传本地文件到HDFS
     */
    @Test
    public void copyFromLocalFile() throws Exception  {
        Path src = new Path("d:/case2.sql");
        Path dist = new Path("/hdfsapi/test/");
        fileSystem.copyFromLocalFile(src, dist);
    }
    
    /**
     * 上传本地文件到HDFS
     */
    @Test
    public void copyBytes() throws Exception  {
        InputStream in = new BufferedInputStream(new FileInputStream(new File("D:/software/apache/spark/spark-1.1.0-SNAPSHOT-bin-2.3.0-cdh5.0.0.tgz")));
        FSDataOutputStream out = fileSystem.create(new Path("/hdfsapi/test/spark.tgz"), new Progressable() {
            @Override
            public void progress() {
                System.out.print("."); //带进度提示信息
            }
        });
        IOUtils.copyBytes(in, out, 4096);
    }
    
    /**
     * 查看某个目录下的所有文件
     */
    @Test
    public void listFiles() throws Exception  {
        FileStatus[] listStatus = fileSystem.listStatus(new Path("/hdfsapi/test"));
        for (FileStatus fileStatus : listStatus) {
            String isDir = fileStatus.isDirectory()?"文件夹":"文件";  //文件/文件夹
            String permission = fileStatus.getPermission().toString(); //权限
            short replication = fileStatus.getReplication(); //副本系数
            long len = fileStatus.getLen(); //长度
            String path = fileStatus.getPath().toString(); //路径
            System.out.println(isDir+"	"+permission+"	"+replication+"	"+len+"	"+path);
        }
    }
    
    /**
     * 查看文件块信息
     */
    @Test
    public void getFileBlockLocations() throws Exception  {
        FileStatus fileStatus = fileSystem.getFileStatus(new Path("/hdfsapi/test/spark.tgz"));
        BlockLocation[] blocks = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
        for(BlockLocation block : blocks) {
            for(String host : block.getHosts()){
                System.out.println(host);
            }
        }
    }
    
    @After
    public void tearDown() throws Exception{
        fileSystem = null;
        configuration = null;
        System.out.println("HDFSApp.tearDown()");
    }

}
原文地址:https://www.cnblogs.com/luogankun/p/4046369.html