SequenceFile

产生一个SequenceFile文件

package _SequenceFileInputFormat;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.Text;

import java.io.IOException;

/**
 * @Author:Dapeng
 * @Discription:
 * @Date:Created in 下午 20:57 2018/11/4 0004
 */
public class MySequenceFileInputFormat {
    /**
     * @Author:Dapeng
     * @Description:通过SequenceFileInputFormat读取文件的例子
     */
    public static void main(String[] args) throws IOException {

        /**
         * 要读取SequenceFileInputFormat文件,必须要有一个这个类型的文件,这个类型的文件是通过类
         * SequenceFile来创建的。
         * Configuration:
         * name:文件名,keyClass:key的数据类型,valClass值的数据类型
         *
         */

        //指定文件名称
        Writer.Option name = Writer.file(new Path("file:/d:/hadoopFile/"));

        //指定key类型
        Writer.Option keyClass = Writer.keyClass(LongWritable.class);

        //指定value类型
        Writer.Option valClass = Writer.valueClass(Text.class);

        //创建输出流
        Writer writer = SequenceFile.createWriter(new Configuration(), name, keyClass, valClass);

        //指定输入文件
        FileSystem fileSystem = FileSystem.get(new Configuration());
        FSDataInputStream in = fileSystem.open(new Path("file:/d:/hadoopFile/blow.txt"));

        String line = null;
        Long num = 0l;
        while((line = in.readLine())!=null){
            num ++;
            writer.append(new LongWritable(num),new Text(line));
        }
        //完成之后关闭输出流
        IOUtils.closeStream(writer);
    }
}
原文地址:https://www.cnblogs.com/da-peng/p/9910880.html