105_实例

MapReduce编程

有三个⽂文件file1、file2、file3,⽂文件中每⼀一⾏行行都是⼀一个数字,如下所示。

file1.txt:

2
32
654
32
15
756
65223

file2.txt:

5956
11
650
92

file3.txt:

26
54
6

请编写 MapReduce 程序实现如下需求:

MapReduce 程序读取这三个文件,对三个文件中的数字进行整体升序排序,并输出到⼀个结果文件中,结果文件中的每一行有两个数字(两个数字之间使用制表符分隔),第一个数字代表排名,第二个数字代表原始数据

期望输出:

1	2
2	6
3	11
4	15
5	26
6	32
7	32
8	54
9	92
10	650
11	654
12	756
13	5956
14	65223

homeworkMapper.java

package com.lagou.mr.homework01;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class homeworkMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> {
    private IntWritable mapperValue = new IntWritable();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        final String num = value.toString().trim();
        mapperValue.set(Integer.parseInt(num));
        context.write(mapperValue, new IntWritable(1));
    }
}

homeworkReducer.java

package com.lagou.mr.homework01;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class homeworkReducer extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
    //读取出来的每一行调用的是同一个homeworkReducer,多次调用reduce方法,对排序要进行累加就必须有一个全局变量
    private IntWritable postion = new IntWritable(1); //排序序号

    @Override
    protected void reduce(IntWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        for (IntWritable value : values) {
            context.write(postion, key);
            postion = new IntWritable(postion.get() + 1);
        }
    }
}

homeworkDriver.java

package com.lagou.mr.homework01;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class homeworkDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
         /*
        1. 获取配置文件对象,获取job对象实例
        2. 指定程序jar的本地路径
        3. 指定Mapper/Reducer类
        4. 指定Mapper输出的kv数据类型
        5. 指定最终输出的kv数据类型
        6. 指定job处理的原始数据路径
        7. 指定job输出结果路径
        8. 提交作业
         */

        final Configuration configuration = new Configuration();
        final Job job = Job.getInstance(configuration, "homeworkDriver");

        job.setJarByClass(com.lagou.mr.homework01.homeworkDriver.class);
        job.setMapperClass(homeworkMapper.class);
        job.setReducerClass(homeworkReducer.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(IntWritable.class);

        job.setNumReduceTasks(1);

        FileInputFormat.setInputPaths(job, new Path("H:\hadoop\learningCode\mapreduce\wordcount\input\homework01"));
        FileOutputFormat.setOutputPath(job, new Path("H:\hadoop\learningCode\mapreduce\wordcount\output\homework01\output"));

        final boolean flag = job.waitForCompletion(true);
        System.exit(flag ? 0 : 1);
    }
}

原文地址:https://www.cnblogs.com/haitaoli/p/15114483.html