MapReduce实现共同朋友问题

答案:

  

package com.duking.mapreduce;

import java.io.IOException;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class FindFriends {
    
    /**
     * map方法
     * @author duking
     *
     */
    public static class Map extends Mapper<Object, Text, Text, Text> {
        
        /**
         * 实现map方法
         */
        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
                
                //将输入的每一行数据切分后存到persions中
                StringTokenizer persions = new StringTokenizer(value.toString());
                
                //定义一个Text 存放本人信息owner
                Text owner = new Text();
                
                //定义一个Set集合,存放朋友信息
                Set<String> set = new TreeSet<String>();
                
                //将这一行的本人信息存入owner中
                owner.set(persions.nextToken());
                
                //将所有的朋友信息存放到Set集合中
                while(persions.hasMoreTokens()){
                    set.add(persions.nextToken());
                }
                
                //定义一个String数组存放朋友信息
                String[] friends = new String[set.size()];
                //将集合转换为数组,并将集合中的数据存放到friend
                friends = set.toArray(friends);
                
                //将朋友进行两两组合
                for(int i=0;i<friends.length;i++){
                    for(int j=i+1;j<friends.length;j++){
                        String outputkey = friends[i]+friends[j];
                        context.write(new Text(outputkey), owner);
                    }
                }
                
        }
    
    }
    
    /**
     * Reduce方法
     * @author duking
     *
     */
    public static class Reduce extends Reducer<Text, Text, Text, Text> {

        /**
         * 实现Reduce方法
         */
        public void reduce(Text key, Iterable<Text> values,Context context) throws IOException, InterruptedException {
            
            String commonfriends = "";
            
            for (Text val : values){
                if(commonfriends == ""){
                    commonfriends = val.toString();
                }else{
                    commonfriends = commonfriends + ":" +val.toString();
                }
            }
            
            context.write(key,new Text(commonfriends));
        }
    }
    
    
    /**
     * main
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        conf.set("mapred.job.tracker", "192.168.60.129:9000");

        //指定待运行参数的目录为输入输出目录
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        
        /*  指定工程目录下的input output为输入输出目录
          String[] ioArgs = new String[] {"input", "output" };
          String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs();
         */

        if (otherArgs.length != 2) { //判断运行参数个数

            System.err.println("Usage: Data Deduplication <in> <out>");

            System.exit(2);

        }

        // set maprduce job name
        Job job = new Job(conf, "findfriends");
        job.setJarByClass(FindFriends.class);

        // 设置map reduce处理类
        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        // 设置输出类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //设置输入输出路径
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }
    

}

结果

原文地址:https://www.cnblogs.com/duking1991/p/6126247.html