Hadoop HDFS文件操作

1、创建目录

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class MakeDir {
	public static void main(String[] args) throws IOException {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path path = new Path("/user/hadoop/data/20130709");
		fs.create(path);
		fs.close();
	}
}


2、删除目录

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class DeleteDir {
	public static void main(String[] args) throws IOException {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		
		Path path = new Path("/user/hadoop/data/20130710");
		fs.delete(path);
		fs.close();
	}
}


3、写文件

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class WriteFile {
	public static void main(String[] args) throws IOException {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path path = new Path("/user/hadoop/data/write.txt");
		FSDataOutputStream out = fs.create(path);
		out.writeUTF("da jia hao,cai shi zhen de hao!");
		fs.close();
	}
}


4、读文件

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class ReadFile {
	public static void main(String[] args) throws IOException {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path path = new Path("/user/hadoop/data/write.txt");
		
		if(fs.exists(path)){
			FSDataInputStream is = fs.open(path);
			FileStatus status = fs.getFileStatus(path);
			byte[] buffer = new byte[Integer.parseInt(String.valueOf(status.getLen()))];
			is.readFully(0, buffer);
			is.close();
            fs.close();
            System.out.println(buffer.toString());
		}
	}
}


5、上传本地文件到HDFS

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class CopyFromLocalFile {

	public static void main(String[] args) throws IOException {
		
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path src = new Path("/home/hadoop/word.txt");
		Path dst = new Path("/user/hadoop/data/");
		fs.copyFromLocalFile(src, dst);
		fs.close();
	}
}


6、删除文件

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class DeleteFile {

	public static void main(String[] args) throws IOException {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		
		Path path = new Path("/user/hadoop/data/word.txt");
		fs.delete(path);
		fs.close();
	}
}


7、获取给定目录下的所有子目录以及子文件

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class GetAllChildFile {
	static Configuration conf = new Configuration();
	
	
	public static void main(String[] args)throws IOException {
		FileSystem fs = FileSystem.get(conf);
		Path path = new Path("/user/hadoop");
		getFile(path,fs);
		//fs.close();
	}
	
	public static void getFile(Path path,FileSystem fs) throws IOException {
		
		FileStatus[] fileStatus = fs.listStatus(path);
		for(int i=0;i<fileStatus.length;i++){
			if(fileStatus[i].isDir()){
				Path p = new Path(fileStatus[i].getPath().toString());
				getFile(p,fs);
			}else{
				System.out.println(fileStatus[i].getPath().toString());
			}
		}
	}

}


8、查找某个文件在HDFS集群的位置

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;

public class FindFile {
	
	public static void main(String[] args) throws IOException {	
		getFileLocal();
	}
	
	/**
	 * 查找某个文件在HDFS集群的位置
	 * @Title:  
	 * @Description: 
	 * @param 
	 * @return 
	 * @throws
	 */
	public static void getFileLocal() throws IOException{
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path path = new Path("/user/hadoop/data/write.txt");
		
		FileStatus status = fs.getFileStatus(path);
		BlockLocation[] locations = fs.getFileBlockLocations(status, 0, status.getLen());
		
		int length = locations.length;
		for(int i=0;i<length;i++){
			String[] hosts = locations[i].getHosts();
			System.out.println("block_" + i + "_location:" + hosts[i]);
		}
	}
	
}


9、HDFS集群上所有节点名称信息

package com.hadoop.file;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;

public class FindFile {
	
	public static void main(String[] args) throws IOException {	
		getHDFSNode();
	}
	
	/**
	 * HDFS集群上所有节点名称信息
	 * @Title:  
	 * @Description: 
	 * @param 
	 * @return 
	 * @throws
	 */
	public static void getHDFSNode() throws IOException{
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);

		DistributedFileSystem  dfs = (DistributedFileSystem)fs;
		DatanodeInfo[] dataNodeStats = dfs.getDataNodeStats();
		
		for(int i=0;i<dataNodeStats.length;i++){
			System.out.println("DataNode_" + i + "_Node:" + dataNodeStats[i].getHostName());
		}
		
	}
	
	
}


 

原文地址:https://www.cnblogs.com/jiangu66/p/3187140.html