MapReduce的WordCount

  1 package com.mengyao.hadoop.mapreduce;
  2 
  3 import java.io.IOException;
  4 
  5 import org.apache.hadoop.conf.Configuration;
  6 import org.apache.hadoop.conf.Configured;
  7 import org.apache.hadoop.fs.Path;
  8 import org.apache.hadoop.io.LongWritable;
  9 import org.apache.hadoop.io.Text;
 10 import org.apache.hadoop.mapreduce.Job;
 11 import org.apache.hadoop.mapreduce.Mapper;
 12 import org.apache.hadoop.mapreduce.Reducer;
 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 14 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 15 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 16 import org.apache.hadoop.util.Tool;
 17 import org.apache.hadoop.util.ToolRunner;
 18 
 19 /**
 20  * 输入文件目录为HDFS上的/mapreduces/word.txt，内容如下：
 21  *         hadoop    zookeeper    hbase    hive
 22  *         flume    sqoop    pig    mahout
 23  *         hadoop    spark    mllib    hive    zookeeper
 24  *         hadoop    storm    kafka    redis    zookeeper
 25  *
 26  * 输出目录为HDFS上的/mapreduces/wordcount/
 27  *     _SUCCESS空文件表示作业执行成功（如果是_FAILD文件则失败）
 28  *  part-r-00000文件表示作业的结果，内容如下：
 29  *      flume    1
 30  *      hadoop    3
 31  *      hbase    1
 32  *      hive    2
 33  *      kafka    1
 34  *      mahout    1
 35  *      mllib    1
 36  *      pig    1
 37  *      redis    1
 38  *      spark    1
 39  *      sqoop    1
 40  *      storm    1
 41  *      zookeeper    3
 42  *  
 43  * @author mengyao
 44  *
 45  */
 46 public class WordCount extends Configured implements Tool {
 47 
 48     static class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
 49         
 50         private Text outputKey;
 51         private LongWritable outputValue;
 52         
 53         @Override
 54         protected void setup(Context context)
 55                 throws IOException, InterruptedException {
 56             this.outputKey = new Text();
 57             this.outputValue = new LongWritable(1L);
 58         }
 59         
 60         @Override
 61         protected void map(LongWritable key, Text value, Context context)
 62                 throws IOException, InterruptedException {
 63             final String[] words = value.toString().split("\t");
 64             for (String word : words) {
 65                 this.outputKey.set(word);
 66                 context.write(this.outputKey, this.outputValue);            
 67             }
 68         }
 69     }
 70     
 71     static class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
 72         
 73         private Text outputKey;
 74         private LongWritable outputValue;
 75         
 76         @Override
 77         protected void setup(Context context)
 78                 throws IOException, InterruptedException {
 79             this.outputKey = new Text();
 80             this.outputValue = new LongWritable();
 81         }
 82         
 83         @Override
 84         protected void reduce(Text key, Iterable<LongWritable> value, Context context)
 85                 throws IOException, InterruptedException {
 86             long count = 0L;
 87             for (LongWritable item : value) {
 88                 count += item.get();
 89             }
 90             this.outputKey.set(key);
 91             this.outputValue.set(count);
 92             context.write(this.outputKey, this.outputValue);
 93         }
 94     }
 95     
 96     @Override
 97     public int run(String[] args) throws Exception {
 98         Job job = Job.getInstance(getConf(), WordCount.class.getSimpleName());
 99         job.setJarByClass(WordCount.class);
100         
101         job.setInputFormatClass(TextInputFormat.class);
102         FileInputFormat.addInputPath(job, new Path(args[0]));
103         FileOutputFormat.setOutputPath(job, new Path(args[1]));
104         
105         job.setMapperClass(WordCountMapper.class);
106         job.setMapOutputKeyClass(Text.class);
107         job.setMapOutputValueClass(LongWritable.class);
108         
109         job.setCombinerClass(WordCountReducer.class);
110         
111         job.setReducerClass(WordCountReducer.class);
112         job.setOutputKeyClass(Text.class);
113         job.setOutputValueClass(LongWritable.class);
114         
115         return job.waitForCompletion(true)?0:1;
116     }
117 
118     public static int createJob(String[] args) {
119         Configuration conf = new Configuration();
120         conf.set("dfs.datanode.socket.write.timeout", "7200000");
121         conf.set("mapreduce.input.fileinputformat.split.minsize", "268435456");
122         conf.set("mapreduce.input.fileinputformat.split.maxsize", "536870912");
123         int status = 0;
124         
125         try {
126             status = ToolRunner.run(conf, new WordCount(), args);
127         } catch (Exception e) {
128             e.printStackTrace();
129         }
130         
131         return status;
132     }
133     
134     public static void main(String[] args) {
135         args = new String[]{"/mapreduces/word.txt", "/mapreduces/wordcount"};
136         if (args.length!=2) {
137             System.out.println("Usage: "+WordCount.class.getName()+" Input paramters <INPUT_PATH> <OUTPUT_PATH>");
138         } else {
139             int status = createJob(args);
140             System.exit(status);
141         }
142     }
143 
144 }