Hadoop-1、HDFS API简介

HDFS是一个高度容错的分布式文件系统,为了保证数据的一致性采用“写入一次,多次读取”的方式。

1、上传本地文件

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class CopeFile {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem hdfs = FileSystem.get(conf);
        //设置本地文件
        Path src = new Path("/home/ja/CADATA/SVD/prediction");
        //设置上传数据的路劲
        Path dst = new Path("/");
        hdfs.copyFromLocalFile(src, dst);
        System.out.println("Upload to" + conf.get("fs.default.name"));
        FileStatus[] files = hdfs.listStatus(dst);
        for (FileStatus file : files) {
            System.out.println(file.getPath());
        }
    }
}

稍微描述下:Hadoop下各种奇葩问题,有些书上没添加配置文件路劲,结果会导致找不到HDFS的文件,所以注意下,不要被坑,而且有些书数组那块不写[],无良阿。

2、创建HDFS文件

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class CreateFile {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem hdfs = FileSystem.get(conf);
        byte[] buff = "Hello Hadoop World!
".getBytes();
        Path dfs = new Path("/Test");
        FSDataOutputStream outputstream = hdfs.create(dfs);
        outputstream.write(buff);
    }
}

3、创建HDFS目录

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class CreateDir {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem hdfs = FileSystem.get(conf);
        Path dfs = new Path("/TestDir");
        hdfs.mkdirs(dfs);
    }
}

4、重命名HDFS文件

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class Rename {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem hdfs = FileSystem.get(conf);
        Path frpath = new Path("/Test");
        Path topath = new Path("/T");
        boolean isRename = hdfs.rename(frpath, topath);
        String result = isRename ? "成功" : "失败";
        System.out.println("重命名结果为:" + result);
    }

}

5、删除HDFS文件

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class DeleteFile {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem hdfs = FileSystem.get(conf);
        Path delef = new Path("/prediction");
        boolean isDelete = hdfs.delete(delef, false);
        System.out.println("Delete ? " + isDelete);
    }

}

删除目录和文件类似。

6、查看HDFS文件是否存在

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class CheckFile {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem hdfs = FileSystem.get(conf);
        Path findf = new Path("/usr/root/input/test.data");
        boolean isExist = hdfs.exists(findf);
        System.out.println("Exists ? " + isExist);
    }
}

7、查看HDFS文件的最后修改时间

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class GetLTime {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem hdfs = FileSystem.get(conf);
        Path fPath = new Path("/T");
        FileStatus fileStatus = hdfs.getFileStatus(fPath);
        long moditime = fileStatus.getModificationTime();
        System.out.println("修改时间:" + moditime);
    }

}

8、读取HDFS某个目录下的所有文件

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class ListALLFile {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem hdfs = FileSystem.get(conf);
        Path listf = new Path("/usr/root/");
        FileStatus[] status = hdfs.listStatus(listf);
        for (int i=0;i<status.length;i++) {
            System.out.println(status[i].getPath().toString());
        }
        hdfs.close();
    }

}

9、查找某个文件在集群中的位置

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;


public class FileLoc {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem hdfs = FileSystem.get(conf);
        Path fpath = new Path("/T");
        FileStatus status = hdfs.getFileStatus(fpath);
        
        BlockLocation[] blockcations = hdfs.getFileBlockLocations(status, 0, status.getLen());
        int blockLen = blockcations.length;
        
        for (int i=0;i<blockLen;i++) {
            String[] hosts = blockcations[i].getHosts();
            System.out.println("block_" + i + "_location:" + hosts[0]);
        }
        hdfs.close();
    }

}

10、获取HDFS集群上所有节点名称信息

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;


public class GetList {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml"));
        FileSystem fs = FileSystem.get(conf);
        DistributedFileSystem hdfs = (DistributedFileSystem)fs;
        
        DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();
        for (int i=0;i<dataNodeStats.length;i++) {
            System.out.println("DataNode_" + i + "_Name:" + dataNodeStats[i].getHostName());
        }
        hdfs.close();
    }
}
原文地址:https://www.cnblogs.com/wn19910213/p/3645399.html