HDFS操作--文件上传/创建/删除/查询文件信息

1.上传本地文件到HDFS

//上传本地文件到HDFS
public class CopyFile {
    public static void main(String[] args) {        
        try {
            Configuration conf = new Configuration();            
            String str_src = "/usr/local/myjar/mongo/地图数据/Zhengye_Drive_Testing_Data/solu"
                    + "/solu_Yanming_DriveTesting_09-04.16-17.16-27_True_TA.json";
            String str_dst = "hdfs://node4:9000/user/hadoop/TestFile.json";
            
            Path src = new Path(str_src); //本地地址
            Path dst = new Path(str_dst); //hdfs地址            

            FileSystem hdfs = dst.getFileSystem(conf);
            //FileSystem hdfs = FileSystem.get(URI.create(str_dst),conf);  //这样也可以
            //伪分布式上面两种都可以,如果直接FileSystem.get(conf),可能出现错误
            
            hdfs.copyFromLocalFile(src, dst);            
            System.out.println("Upload to "+conf.get("fs.default.name"));
            
            FileStatus files[] = hdfs.listStatus(dst);
            for(FileStatus file:files){
                System.out.println(file.getPath());
            }            
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

可能出现的错误 Wrong FS解决方法:
http://blog.csdn.net/kurama_sai/article/details/8604640
http://blog.itpub.net/22846396/viewspace-1119945

2. 在hdfs中创建文件,并写入一行文字

//创建文件,并向文件中写入一行文字
public class CreateFile {
    public static void main(String[] args) {       
        try {
            Configuration conf = new Configuration();
            byte[] buff = "This is a test line.".getBytes();
            String dsf = "hdfs://node4:9000/user/hadoop/Test";
            Path pathdsf = new Path(dsf);
            FileSystem hdfs = pathdsf.getFileSystem(conf);
            FSDataOutputStream outputStream = hdfs.create(pathdsf);
            outputStream.write(buff,0,buff.length);
            System.out.println("Finish write!");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

 3.删除文件

Configuration conf = new Configuration();
Path path_del = new Path("hdfs://node4:9000/user/hadoop/Test2");
FileSystem hdfs = path_del.getFileSystem(conf);
boolean isDeleted = hdfs.delete(path_del,false);
//hdfs.delete(path_del,true); //递归删除,如果path_del是一个文件夹,将文件夹以及下面的子文件全删除
System.out.println("delete? " +isDeleted);

4.重命名文件

Configuration conf = new Configuration();
Path path_fr = new Path("hdfs://node4:9000/user/hadoop/Test");
Path path_to = new Path("hdfs://node4:9000/user/hadoop/Test2");
FileSystem hdfs = path_fr.getFileSystem(conf);
boolean isRename = hdfs.rename(path_fr, path_to);  //对文件进行重命名
System.out.println("is rename? "+isRename);

5.查看文件以及文件系统的各项信息

Configuration conf = new Configuration();
Path findf = new Path("hdfs://node4:9000/user/hadoop/hadoop.txt");
FileSystem hdfs = findf.getFileSystem(conf);

//查看某个HDFS文件是否存在
boolean isExists = hdfs.exists(findf); //查看文件或文件夹是否存在
System.out.println("exists? " + isExists);

//查看HDFS文件的属性
FileStatus filestatus = hdfs.getFileStatus(findf);
long modificationTime = filestatus.getModificationTime(); //最后修改时间
System.out.println("Modification time is: "+modificationTime);
long blocksize = filestatus.getBlockSize(); //块大小
System.out.println("Block size is: "+blocksize);


//查看某个文件在HDFS集群的位置
BlockLocation[] blkLocations = hdfs.getFileBlockLocations(filestatus, 0, filestatus.getLen());
int blockLen = blkLocations.length;
for(int i = 0 ; i < blockLen ; i++){
    String[] hosts = blkLocations[i].getHosts();
    System.out.println("block "+i+" location: "+hosts[i]);
}    

//查看hdfs文件系统的的各项信息
System.out.println("scheme: "+hdfs.getScheme()); 
System.out.println("used: "+hdfs.getUsed());
System.out.println("canonical service name: "+hdfs.getCanonicalServiceName());
System.out.println("default block size: "+hdfs.getDefaultBlockSize(findf));

输出结果:

exists? true
Modification time is: 1430225267896
Block size is: 134217728
block 0 location: node4
scheme: hdfs
used: 0
canonical service name: 192.168.1.160:9000
default block size: 134217728

6.读取HDFS中的文件内容

下面代码的效果就是Test文件的内容输出

String dsf = "hdfs://node4:9000/user/hadoop/Test";
Configuration conf = new Configuration();

Path pathdsf = new Path(dsf);

FileSystem fs = FileSystem.get(URI.create(dsf), conf);
//FileSystem fs = pathdsf.getFileSystem(conf); //这样也可以

FSDataInputStream hdfsInStream = fs.open(pathdsf);

byte[] ioBuffer = new byte[1024];
int readLen = hdfsInStream.read(ioBuffer);
while (readLen != -1) {
    System.out.write(ioBuffer, 0, readLen);
    readLen = hdfsInStream.read(ioBuffer);
}
hdfsInStream.close();
fs.close();

7.获取集群上所有节点的名称

Configuration conf = new Configuration();
Path path = new Path("hdfs://node4:9000/user/hadoop");
FileSystem fs = path.getFileSystem(conf);
DistributedFileSystem dfs = (DistributedFileSystem) fs;
DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats();

String[] names = new String[dataNodeStats.length];
for(int i = 0 ; i < dataNodeStats.length ; i++){
    names[i] = dataNodeStats[i].getHostName();
    System.out.println("no."+i+", name:"+names[i]);
}

输出的就是节点名称
no.0, name:node4
no.1, name:node3

原文地址:https://www.cnblogs.com/gnivor/p/4895888.html