Hadoop HDFS的Java操作

package com.test.hdfs;

import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSInputStream;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.io.IOUtils;

public class HDFSTest {
    public static Configuration conf=new Configuration();
    public static FileSystem getFs(URI u,Configuration c){
        FileSystem fs=null;
        try {
            fs = FileSystem.get(u, c);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return fs;
    }
    //获取datanode节点信息
    public static void getDataNodeInfo(FileSystem fsm){
        DistributedFileSystem dfs=(DistributedFileSystem)fsm;
        DatanodeInfo dinfo[]=null;
        try {
            //指定状态的DataNode信息: ALL, LIVE, DEAD, DECOMMISSIONING
            dinfo = dfs.getDataNodeStats();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        for(DatanodeInfo info:dinfo){
            System.out.println(info);            
        }
    }
    //查找某个文件在集群中的位置
    public static void getFileBlockLocation(String filePath,FileSystem fsm){
        BlockLocation bl[]=null;
        FileStatus fst=null;
        try {
            fst = fsm.getFileStatus(new Path(filePath));
        } catch (IllegalArgumentException | IOException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }
        try {
            bl=fsm.getFileBlockLocations(fst, 0, fst.getLen());
        } catch (IOException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }
        try {
            fsm.get(conf);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        for(BlockLocation b:bl){
            System.out.println(b);
        }
    }
    //读取文件
    public static void readFile(String path,FileSystem fsm,Configuration conf){
        FSDataInputStream fis=null;
        try {
            fis=fsm.open(new Path(path));
        } catch (IllegalArgumentException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        try {
            IOUtils.copyBytes(fis, System.out, conf, false);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
            IOUtils.closeStream(fis);
    }
    //列出文件
    public static void listDir(FileSystem fs,Path pth){
        FileStatus [] fsts=null;
        try {
            fsts=fs.listStatus(pth);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        for(FileStatus fstst:fsts){
            String isDir=fstst.isDirectory()?"目录":"文件";
            String name =fstst.getPath().toString();
            System.out.println(isDir+":"+name);
        }
    }
    
    
    
    public static void main(String[] args) throws URISyntaxException, IllegalArgumentException, IOException {
          FileSystem fsm=getFs(new URI("hdfs://192.168.10.21:9000"),conf);
        //创建目录
          fsm.mkdirs(new Path("test333"));
        //删除目录
        //fsm.delete(new Path("test111"),true);
        //上传文件
        //fsm.copyFromLocalFile(true,true,new Path("E:\pack_work\pycharm-professional-2017.1.2.exe"), new Path("/user/Administrator/test333/"));
        //下载文件
        //fsm.copyToLocalFile(false, new Path("\user\Administrator\test333\pycharm-professional-2017.1.2.exe"), new Path("J:\test\"),true);
        //fsm.close();
        //获取datanode节点信息
        //getDataNodeInfo(fsm);
        //获取文件所在数据块的位置
        //getFileBlockLocation("/user/Administrator/test333/pycharm-professional-2017.1.2.exe", fsm);
        //读取HDFS文件
        //readFile("hdfs://192.168.10.22:9000/test/yarn-xfvm-resourcemanager-xfvm01.log", fsm, conf);
          listDir(fsm, new Path("/test"));
    }
       
}
原文地址:https://www.cnblogs.com/MrFee/p/hadoop_java_api.html