MR 文件合并

 1 package com.euphe.filter;
 2 
 3 import com.euphe.util.HUtils;
 4 import com.euphe.util.Utils;
 5 import org.apache.hadoop.conf.Configuration;
 6 import org.apache.hadoop.conf.Configured;
 7 import org.apache.hadoop.fs.FileSystem;
 8 import org.apache.hadoop.fs.Path;
 9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.Reducer;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 import org.apache.hadoop.util.GenericOptionsParser;
16 import org.apache.hadoop.util.Tool;
17 
18 import java.io.IOException;
19 
20 public class ReductionJob extends Configured implements Tool {
21     public static class Map extends Mapper<Object, Text, Text, Text> {
22         private static Text text = new Text();
23 
24         public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
25             text = value;
26             context.write(text, new Text());
27         }
28     }
29 
30     public static class Reduce extends Reducer<Text, Text, Text, Text> {
31         public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
32             context.write(key, new Text());
33         }
34     }
35     @Override
36     public int run(String[] args) throws Exception {
37         Configuration conf = HUtils.getConf();
38         conf.set("mapreduce.job.jar", Utils.getRootPathBasedPath("WEB-INF/jars/redu.jar"));
39         String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();//解析命令行参数
40         if (otherArgs.length !=2) {//要求必须有输入和输出路径两个参数
41             System.err.println("Usage: com.euphe.filter.ReductionJob <in> <out>");
42             System.exit(2);
43         }
44         Job job =  Job.getInstance(conf,"Reduction input  :"+otherArgs[0]+" to "+otherArgs[1]);
45         job.setJarByClass(ReductionJob.class);
46         job.setMapperClass(Map.class);
47         job.setReducerClass(Reduce.class);
48         job.setNumReduceTasks(1);
49 
50         job.setOutputKeyClass(Text.class);
51         job.setOutputValueClass(Text.class);
52 
53         FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
54         FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
55         FileSystem.get(conf).delete(new Path(otherArgs[1]), true);//调用任务前先删除输出目录
56         return job.waitForCompletion(true) ? 0 : 1;
57     }
58 }