MapReduce案例:好友推荐简单实现

一、好友推荐算法在实际的社交环境中应用较多,比如qq软件中的“你可能认识的好友”或者是Facebook中的好友推介。常见的好友推介算法有六度分割理论,三元闭包论和最基本的好友推介算法。
学习和分享好友推介算法。
假设用户A有好友A1,A2,A3,则A1,A2,A3相互之间都可能通过好友A认识,是潜在的好友关系。如果用户B有好友A1,A2,B1,则A1,A2,B1相互之间都可能通过好友B认识。如下图所示
1.1
如上图所示,A1,A2在潜在好友列表中出现2次,说明A1,A2有2个共同好友,在上图中即为A,B。当两个潜在好友如果共同好友越多,则他们可能认识的可能性就越大,彼此之间推介机会就越高。算法简单介绍如下:首先需要遍历所有用户的好友列表生成两两间的潜在好友列表,计算所有用户潜在好友列表中同一对潜在好友出现的次数。如上图例子中A1,A2出现次数为2次。注意:A1:A2与A2:A1为相同的一对潜在好友,计算时应进行累加。再次计算同一个用户的潜在好友列表和出现的次数,如图中A1用户,A1与A2出现2次,A1与A3出现1次,A1与B1出现1次。如果只能给每个用户推介一个好友的话,A2与A1认识的可能性更大,优先推介A2。注意:还需要除去已经是该用户好友的潜在好友列表。如果A1与A2已经是好友关系了,则不需要再次推介。

实验数据:
1.2

数据流程:
1.2

二、MapReduce程序实现

package friends;

...

public class RunJob {
    public static void main(String[] args) {
        System.setProperty("HADOOP_USER_NAME", "root");
    
        Configuration conf = new Configuration();
        try {
            Job job = Job.getInstance(conf);
            FileSystem fs = FileSystem.get(conf);
            job.setJobName("friend1");
            job.setJarByClass(RunJob.class);

            job.setMapperClass(Mapper1.class);
            job.setReducerClass(Reducer1.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setNumReduceTasks(1); //设置reduce的数量
            job.setInputFormatClass(KeyValueTextInputFormat.class);

            //设置计算输入数据
            FileInputFormat.addInputPath(job, new Path("/friend"));
            //设置计算输出目录(mapreduce计算完成之后,最后的结果存放的目录)
            Path outpath = new Path("/fout/f1");  //该目录必须不能存在,如果存在计算框架会出错
            if (fs.exists(outpath)) {//如果存在该目录,则删除
                fs.delete(outpath, true);
            }
            FileOutputFormat.setOutputPath(job, outpath);
            //开始执行
            boolean f = job.waitForCompletion(true);
            if (f) {
                System.out.println("mapreduce程序1执行成功");
                job = Job.getInstance(conf);
                job.setJobName("friend1");
                job.setJarByClass(RunJob.class);

                job.setMapperClass(RunJob.Mapper2.class);
                job.setReducerClass(RunJob.Reducer2.class);

                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(Text.class);
                job.setSortComparatorClass(RunJob.SortComparator.class);
                job.setGroupingComparatorClass(RunJob.GroupComparator.class);

                job.setNumReduceTasks(1); //设置reduce的数量
                job.setInputFormatClass(KeyValueTextInputFormat.class);
                FileInputFormat.addInputPath(job, new Path("/fout/f1"));
                outpath = new Path("/fout/f2");
                if (fs.exists(outpath)) { //如果存在该目录,则删除
                    fs.delete(outpath, true);
                }
                FileOutputFormat.setOutputPath(job, outpath);
                f = job.waitForCompletion(true);
                if (f) {
                    System.out.println("mapreduce2程序2执行成功");
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    /**
     *
     */
    static class Mapper1 extends Mapper<Text, Text, Text, IntWritable> {
        @Override
        protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
            String user = key.toString();
            String[] frinends = value.toString().split("	");
            for (int i = 0; i < frinends.length; i++) {
                String f1 = frinends[i];
                // 获得直接好友
                String userAndFriend = getFof(user, f1);
                //输出直接好友
                context.write(new Text(userAndFriend), new IntWritable(-1));
                //从当前一个开始往后遍历
                for (int j = i + 1; j < frinends.length; j++) {
                    String f2 = frinends[j];
                    String fof = getFof(f1, f2);
                    context.write(new Text(fof), new IntWritable(1));
                }
            }
        }

        /**
         * 保证好友顺序一致
         *
         * @param user1
         * @param user2
         * @return
         */
        private String getFof(String user1, String user2) {
            if (user1.compareTo(user2) > 0) {
                return user1 + ":" + user2;
            } else {
                return user2 + ":" + user1;
            }
        }
    }

    /**
     * 第二次map操作
     */
    static class Mapper2 extends Mapper<Text, Text, Text, Text> {
        @Override
        protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
            String[] users = key.toString().split(":");
            int count = Integer.parseInt(value.toString());
            Text k1 = new Text(users[0] + "," + count);
            Text v1 = new Text(users[1]);
            context.write(k1, v1);
            Text k2 = new Text(users[1] + "," + count);
            Text v2 = new Text(users[0]);
            context.write(k2, v2);
        }
    }

    /**
     * reduce阶段 第一次
     */
    static class Reducer1 extends Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            //判断是否存在直接好友的FOF
            int sum = 0;
            boolean flag = true;
            for (IntWritable i : values) {
                if (i.get() == -1) {
                    flag = false;
                    break;
                } else {
                    sum += i.get();
                }
            }
            if (flag) {
                context.write(key, new IntWritable(sum));
            }
        }
    }

    /**
     * 第二次reduce
     */
    static class Reducer2 extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuffer stringBuffer = new StringBuffer();
            String user = key.toString().split(",")[0];
            for (Text t : values) {
                stringBuffer.append(t.toString()).append("	");
            }
            stringBuffer.substring(0, stringBuffer.length() - 1);
            context.write(new Text(user), new Text(stringBuffer.toString()));
        }
    }

    /**
     * 自定义排序比较器
     */
    static class SortComparator extends WritableComparator {
        public SortComparator() {
            super(Text.class, true);
        }

        /**
         * @param a
         * @param b
         * @return
         */
        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            Text k1 = (Text) a;
            Text k2 = (Text) b;
            String[] sk1 = k1.toString().split(",");
            String[] sk2 = k2.toString().split(",");
            int res = sk1[0].compareTo(sk2[0]);
            return res == 0 ? (-Integer.compare(Integer.parseInt(sk1[1]), Integer.parseInt(sk2[1]))) : res;

        }
    }

    /**
     * 分组比较器
     */
    static class GroupComparator extends WritableComparator {
        public GroupComparator() {
            super(Text.class, true);
        }

        @Override
        public int compare(WritableComparable a, WritableComparable b) {
            Text k1 = (Text) a;
            Text k2 = (Text) b;
            String[] kk1 = k1.toString().split(",");
            String[] kk2 = k2.toString().split(",");
            return kk1[0].compareTo(kk2[0]);
        }
    }
}



结果:
1.3
.
.
.
.
.
.
.
参考:
https://blog.csdn.net/liuzebin9/article/details/75093190
https://blog.csdn.net/sinat_34045444/article/details/86484934

原文地址:https://www.cnblogs.com/ernst/p/12819181.html