MapReduce实例

数据去重：

 1 import java.io.IOException;
 2 
 3 import org.apache.hadoop.fs.Path;
 4 import org.apache.hadoop.io.Text;
 5 import org.apache.hadoop.mapreduce.Job;
 6 import org.apache.hadoop.mapreduce.Mapper;
 7 import org.apache.hadoop.mapreduce.Reducer;
 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
10 
11 public class Dedup {
12     public static class Map extends Mapper<Object, Text, Text, Text>{
13         private static Text line=new Text();
14         
15         public void  map(Object key,Text value,Context context) throws IOException, InterruptedException {
16             line=value;
17             context.write(line, new Text(""));
18         }
19     }
20     
21     public static class Reduce extends Reducer<Text, Text, Text, Text>{
22         public void  ruduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException {
23             context.write(key, new Text(""));
24         }
25     }
26     
27     public static void main(String[] args) throws Exception {
28         if (args.length != 2) {
29             System.err.println("Usage: DataAverage <input path> <output path>");
30             System.exit(-1);
31         }
32         Job job = new Job();
33         job.setJobName("Dedup");
34         job.setJarByClass(Dedup.class);
35         job.setMapperClass(Map.class);
36         job.setCombinerClass(Reducer.class);
37         job.setReducerClass(Reducer.class);
38         job.setOutputKeyClass(Text.class);
39         job.setOutputValueClass(Text.class);
40         FileInputFormat.addInputPath(job, new Path(args[0]));
41         FileOutputFormat.setOutputPath(job, new Path(args[1]));
42         System.exit(job.waitForCompletion(true) ? 0 : 1);
43     }
44 }

View Code

排序：

 1 import java.io.IOException;
 2 
 3 import org.apache.hadoop.fs.Path;
 4 import org.apache.hadoop.io.IntWritable;
 5 import org.apache.hadoop.io.Text;
 6 import org.apache.hadoop.mapreduce.Job;
 7 import org.apache.hadoop.mapreduce.Mapper;
 8 import org.apache.hadoop.mapreduce.Reducer;
 9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
11 
12 
13 public class Sort {
14     public static class Map extends Mapper<Object, Text, IntWritable, IntWritable>{
15         private static IntWritable data= new IntWritable();
16         public void map(Object key,Text value,Context context) throws IOException, InterruptedException {
17             String lineString=value.toString();
18             data.set(Integer.parseInt(lineString));
19             context.write(data, new IntWritable(1));
20         }
21     }
22     
23     public static class Reduce extends Reducer<IntWritable ,IntWritable,IntWritable,IntWritable>{
24         private static IntWritable linenum=new IntWritable(1);
25         public void reduce(IntWritable key,Iterable< IntWritable> values,Context context) throws IOException, InterruptedException {
26             for(IntWritable value:values){
27                 context.write(linenum, key);
28                 linenum=new IntWritable(linenum.get()+1);
29             }
30         }
31     }
32     
33     public static void main(String[] args) throws Exception {
34         if (args.length != 2) {
35             System.err.println("Usage: DataAverage <input path> <output path>");
36             System.exit(-1);
37         }
38         Job job = new Job();
39         job.setJobName("Sort");
40         job.setJarByClass(Sort.class);
41         
42         job.setMapperClass(Map.class);
43         job.setReducerClass(Reducer.class);
44         
45         job.setOutputKeyClass(IntWritable.class);
46         job.setOutputValueClass(IntWritable.class);
47         
48         FileInputFormat.addInputPath(job, new Path(args[0]));
49         FileOutputFormat.setOutputPath(job, new Path(args[1]));
50         System.exit(job.waitForCompletion(true) ? 0 : 1);
51     }
52 }

View Code

求平均值：

 1 import java.io.IOException;
 2 import java.util.StringTokenizer;
 3 
 4 import org.apache.hadoop.fs.Path;
 5 import org.apache.hadoop.io.IntWritable;
 6 import org.apache.hadoop.io.Text;
 7 import org.apache.hadoop.mapreduce.Job;
 8 import org.apache.hadoop.mapreduce.Mapper;
 9 import org.apache.hadoop.mapreduce.Reducer;
10 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
12 
13 public class Score {
14     public static class Map extends Mapper<Object, Text, Text, IntWritable>{
15         public void map(Object key,Text value,Context context) throws IOException, InterruptedException{
16             String lineString =value.toString();
17             StringTokenizer stringTokenizer=new StringTokenizer(lineString,"
");
18             while (stringTokenizer.hasMoreElements()) {
19                 StringTokenizer tokenizerLine = new StringTokenizer(stringTokenizer.nextToken());
20                 String strName=tokenizerLine.nextToken();
21                 String strScore=tokenizerLine.nextToken();
22                 Text name=new Text(strName);
23                 int score=Integer.parseInt(strScore);
24                 context.write(name, new IntWritable(score));
25             }
26         }
27     }
28     
29     public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>{
30         public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
31             int sum=0;
32             int count=0;
33             while (values.iterator().hasNext()) {
34                 sum+=values.iterator().next().get();
35                 count++;
36             }
37             int averate=(int)sum/count;
38             context.write(key, new IntWritable(averate));
39         }
40     }
41     
42     public static void main(String[] args) throws Exception {
43         if (args.length != 2) {
44             System.err.println("Usage: DataAverage <input path> <output path>");
45             System.exit(-1);
46         }
47         Job job = new Job();
48         job.setJobName("Score");
49         job.setJarByClass(Score.class);
50         
51         job.setMapperClass(Map.class);
52         job.setReducerClass(Reducer.class);
53         
54         job.setOutputKeyClass(Text.class);
55         job.setOutputValueClass(IntWritable.class);
56         
57         FileInputFormat.addInputPath(job, new Path(args[0]));
58         FileOutputFormat.setOutputPath(job, new Path(args[1]));
59         System.exit(job.waitForCompletion(true) ? 0 : 1);
60     }
61 }

View Code