HDFS手动拷贝某个特定的数据块(比如某个文件的第二个数据块)

代码呈现

package api;


import java.io.File;
import java.io.FileOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

/**
 * 手动拷贝某个特定的数据块(比如某个文件的第二个数据块)
 * @author potter
 *
 */
public class TestCat {

    public static void main(String[] args) throws Exception {
        
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://potter2:9000");
        System.setProperty("HADOOP_USER_NAME", "potter");
        FileSystem fs = FileSystem.get(conf);
        
        FSDataInputStream in = fs.open(new Path("/111.zip"));
        System.out.println(in+",1111111");
        //获取文件信息
        FileStatus[] listStatus = fs.listStatus(new Path("/111.zip"));
        System.out.println(listStatus.length+",2222222");
        //获取文件块的信息
        BlockLocation[] blockLocations = fs.getFileBlockLocations(listStatus[0], 0L, listStatus[0].getLen());
        System.out.println(blockLocations.length+",3333333");
        
        //获取第二个块的长度
        Long lenght = blockLocations[1].getLength();
        System.out.println(lenght+",444444444");
        //获取第二个块的起始偏移量
        Long offset = blockLocations[1].getOffset();
        System.out.println(offset+",5555555");
        in.seek(offset);
        FileOutputStream os = new FileOutputStream(new File("D:/block2"));
        
        //获取第二个块的写入输出流
        IOUtils.copyBytes(in, os, lenght, true);
        
        in.close();
        os.close();
    }
}
原文地址:https://www.cnblogs.com/sangumaolu/p/8545900.html