MapReduce实例

  今天上课的实验我们做了MapReduce的相关实验,了解了下MapReduce的简单实验:具体内容为:统计id出现的次。

  首先建立MapReduce的项目,运行相关的代码。(我用的是windows连接Linux下的Hadoop)

  

package mapreduce;

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount {
    public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException{
        Job job = Job.getInstance();
        job.setJobName("WordCount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(doMapper.class);
        job.setReducerClass(doReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        Path in = new Path("hdfs://192.168.43.18:9000/user/hadoop/data/mapreduce1/buyer_favorite1");
        Path out = new Path("hdfs://192.168.43.18:9000/user/hadoop/data/mapreduce1/out");
        FileInputFormat.addInputPath(job, in);
        FileOutputFormat.setOutputPath(job, out);
        
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
    public static class doMapper extends Mapper<Object, Text, Text, IntWritable>{
        public static final IntWritable one = new IntWritable(1);
        public static Text word = new Text();
        @Override
        protected void map(Object key, Text value, Context context)
                    throws IOException, InterruptedException {
            StringTokenizer tokenizer = new StringTokenizer(value.toString(), "	");
                word.set(tokenizer.nextToken());
                context.write(word, one);
                }
        }
    public static class doReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
            private IntWritable result = new IntWritable();
            @Override
            protected void reduce(Text key, Iterable<IntWritable> values, Context context)
                    throws IOException, InterruptedException { 
            int sum = 0;
                for (IntWritable value : values) {
                sum += value.get();
                }
                result.set(sum);
                context.write(key, result);
                }
            }
}

  具体最初的文件为:

  

10181    1000481    2010-04-04 16:54:31
20001    1001597    2010-04-07 15:07:52
20001    1001560    2010-04-07 15:08:27
20042    1001368    2010-04-08 08:20:30
20067    1002061    2010-04-08 16:45:33
20056    1003289    2010-04-12 10:50:55
20056    1003290    2010-04-12 11:57:35
20056    1003292    2010-04-12 12:05:29
20054    1002420    2010-04-14 15:24:12
20055    1001679    2010-04-14 19:46:04
20054    1010675    2010-04-14 15:23:53
20054    1002429    2010-04-14 17:52:45
20076    1002427    2010-04-14 19:35:39
20054    1003326    2010-04-20 12:54:44
20056    1002420    2010-04-15 11:24:49
20064    1002422    2010-04-15 11:35:54
20056    1003066    2010-04-15 11:43:01
20056    1003055    2010-04-15 11:43:06
20056    1010183    2010-04-15 11:45:24
20056    1002422    2010-04-15 11:45:49
20056    1003100    2010-04-15 11:45:54
20056    1003094    2010-04-15 11:45:57
20056    1003064    2010-04-15 11:46:04
20056    1010178    2010-04-15 16:15:20
20076    1003101    2010-04-15 16:37:27
20076    1003103    2010-04-15 16:37:05
20076    1003100    2010-04-15 16:37:18
20076    1003066    2010-04-15 16:37:31
20054    1003103    2010-04-15 16:40:14
20054    1003100    2010-04-15 16:40:16

运行的结果为:

 

原文地址:https://www.cnblogs.com/huan-ch/p/11768592.html