HDFS API

Hadoop的文件操作位于包org.apache.hadoop.fs里面,能够进行新建、删除、修改等操作。

比较重要的几个类:

(1)Configuration:HDFS的配置信息;

(2)FileSystem: HDFS文件系统;

(3)Path: HDFS文件或目录的路径;

(4)FileStatus: Path下面的文件或目录;

(5)BlockLocation: 文件块的位置;

(6)DistributedFileSystem: 分布式文件系统;

(7)DatanodeInfo:数据节点信息。

代码:

package com.hellohadoop;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataOutputStream;

public class FileOperator {
    
    static Configuration conf;
    static FileSystem hdfs;
    
    public static void main(String[] args) throws Exception{
        Init();
        Upload();
//        Create();
//        CreateFolder();
//        Rename();
//        Delete();
//        IsExist();
//        GetTime();
//        GetAllFiles();
//        GetLocations();
//        GetAllHosts();
    }
    
    // 初始化
    public static void Init() throws Exception{
        conf = new Configuration();
        hdfs = FileSystem.get(conf);            
    }
    
    // 上传文件
    public static void Upload() throws Exception{
        // 本地文件
        Path src = new Path("F:\Hadoop\DataFiles\data.txt");
        // 上传路径
        Path dst = new Path("hdfs://master:9000/user/Administrator/");
        // 上传文件
        hdfs.copyFromLocalFile(src, dst);
        System.out.println("Upload to " + conf.get("fs.default.name"));
    }
    
    // 创建文件
    public static void Create() throws Exception{
        byte[] buff = "Hello Hadoop!".getBytes();
        // 新建文件路径
        Path dfs = new Path("hdfs://master:9000/user/Administrator/hello");
        FSDataOutputStream outputStream = hdfs.create(dfs);
        outputStream.write(buff, 0, buff.length);
        System.out.println("Created!");
    }
    
    // 创建文件夹
    public static void CreateFolder() throws Exception{
        // 新建文件路径
        Path dfs = new Path("hdfs://master:9000/user/Administrator/helloDir");
        hdfs.mkdirs(dfs);
        System.out.println("Created!");
    }
    
    // 重新命名HDFS文件
    public static void Rename() throws Exception{
        Path of = new Path("hdfs://master:9000/user/Administrator/data.txt");
        Path nf = new Path("hdfs://master:9000/user/Administrator/newdata.txt");
        boolean isOk = hdfs.rename(of, nf);
        String res = isOk? "Yes": "No";
        System.out.println("Result:" + res);
    }
    
    // 删除HDFS文件
    public static void Delete() throws Exception{
        Path file = new Path("hdfs://master:9000/user/Administrator/helloDir");
//        hdfs.delete(file, false);// 第二个为是否递归删除,如果目录下面有文件,是否递归删除为false会报错
        hdfs.delete(file, true);
    }
    
    // 查看某个文件是否存在
    public static void IsExist() throws IOException{
        Path file = new Path("hdfs://master:9000/user/Administrator/hello1");
        String res = hdfs.exists(file)? "Yes": "No";
        System.out.println("Result:" + res);
    }
    
    // 查看文件最后修改时间
    public static void GetTime() throws Exception{
        Path file = new Path("hdfs://master:9000/user/Administrator/hello");
        FileStatus fs = hdfs.getFileStatus(file);
        long mTime = fs.getModificationTime();
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
        String sTime = sdf.format(new Date(mTime));
        System.out.println("Modified Time:" + sTime);
    }
    
    // 递归查看文件目录
    public static void GetAllFiles() throws Exception{
        Path file = new Path("hdfs://master:9000/");
        AllFile(file);
    }
    
    public static void AllFile(Path p) throws Exception{
        FileStatus[] fs = hdfs.listStatus(p);
        for (FileStatus f: fs){
            // 是文件输出路径
            if (hdfs.isFile(f.getPath())){
                System.out.println(f.getPath());
            }
            // 是目录继续递归
            else{
                System.out.println();
                System.out.println(f.getPath() + ":");
                AllFile(f.getPath());
            }
        }
        
    }
    
    // 查看文件在HDFS中的位置
    public static void GetLocations() throws Exception{
        Path file = new Path("hdfs://master:9000/user/Administrator/stsme_20150330.sql");
        FileStatus fs = hdfs.getFileStatus(file);
        BlockLocation[] blk = hdfs.getFileBlockLocations(fs, 0, fs.getLen());
        for(int i=0; i<blk.length; i++){
            String[] host = blk[i].getHosts();
            System.out.println("Block " + i + " Location:" + host[0]);
        }        
    }
    
    // 查看HDFS集群上所有节点
    public static void GetAllHosts() throws Exception{
        DistributedFileSystem dfs = (DistributedFileSystem) hdfs;
        DatanodeInfo[] dns = dfs.getDataNodeStats();
        for (int i=0; i<dns.length; i++){
            System.out.println("Datanode " + i + " Name:" + dns[i].getHostName());
        }
    }
    
}
原文地址:https://www.cnblogs.com/mstk/p/6682179.html