根据PV统计出前三的热门板块,并统计出热门板块下的用户数--方式一


根据PV统计出前三的热门板块,并统计出热门板块下的用户数--方式一

测试数据
java代码
  1 package com.hzf.spark.study;
  2 
  3 import java.util.ArrayList;
  4 import java.util.Collections;
  5 import java.util.Comparator;
  6 import java.util.HashMap;
  7 import java.util.Iterator;
  8 import java.util.List;
  9 import java.util.Map;
 10 import java.util.Set;
 11 
 12 import org.apache.spark.SparkConf;
 13 import org.apache.spark.api.java.JavaPairRDD;
 14 import org.apache.spark.api.java.JavaRDD;
 15 import org.apache.spark.api.java.JavaSparkContext;
 16 import org.apache.spark.api.java.function.Function;
 17 import org.apache.spark.api.java.function.PairFunction;
 18 import org.apache.spark.api.java.function.VoidFunction;
 19 import org.apache.spark.broadcast.Broadcast;
 20 
 21 import scala.Tuple2;
 22 
 23 public class HotChannel01 {
 24     public static void main(String[] args) {
 25         SparkConf conf = new SparkConf()
 26                 .setAppName("HotChannel")
 27                 .setMaster("local")
 28                 .set("spark.testing.memory", "2147480000");
 29         JavaSparkContext sc = new JavaSparkContext(conf);
 30         JavaRDD<String> logRDD = sc.textFile("userLog1");
 31         String str = "View";
 32         final Broadcast<String> broadcast = sc.broadcast(str);
 33         hotChannel(sc, logRDD, broadcast);
 34     }
 35     private static void hotChannel(JavaSparkContext sc,JavaRDD<String> logRDD, final Broadcast<String> broadcast) {
 36         JavaRDD<String> filteredLogRDD = logRDD.filter(new Function<String, Boolean>() {
 37             
 38             private static final long serialVersionUID = 1L;
 39 
 40             @Override
 41             public Boolean call(String v1) throws Exception {
 42                 String actionParam = broadcast.value();
 43                 String action = v1.split("	")[5];
 44                 return actionParam.equals(action);
 45             }
 46         });
 47         
 48         JavaPairRDD<String, String> channel2nullRDD = filteredLogRDD.mapToPair(new PairFunction<String, String,String>() {
 49 
 50             private static final long serialVersionUID = 1L;
 51 
 52             @Override
 53             public Tuple2<String, String> call(String val) throws Exception {
 54                 String channel = val.split("	")[4];
 55                 
 56                 return new Tuple2<String, String>(channel,null);
 57             }
 58         });
 59         Map<String, Object> channelPVMap = channel2nullRDD.countByKey();
 60         Set<String> keySet = channelPVMap.keySet();
 61         List<SortObj> channels  = new ArrayList<>();
 62         for(String channel : keySet){ 
 63             channels.add(new SortObj(channel, Integer.valueOf(channelPVMap.get(channel)+"")));
 64         }
 65         Collections.sort(channels, new Comparator<SortObj>() {
 66 
 67             @Override
 68             public int compare(SortObj o1, SortObj o2) {
 69                 return o2.getValue() - o1.getValue();
 70             }
 71         });
 72         
 73         List<String> hotChannelList = new ArrayList<>();
 74         for (int i = 0; i < 3; i++) {
 75             hotChannelList.add(channels.get(i).getKey());
 76         }
 77         for(String channle : hotChannelList){
 78             System.out.println("channle:" + channle);
 79         }
 80         
 81         final Broadcast<List<String>> hotChannelListBroadcast = sc.broadcast(hotChannelList);
 82         
 83          
 84         JavaRDD<String> filtedRDD = logRDD.filter(new Function<String, Boolean>() {
 85 
 86             @Override
 87             public Boolean call(String v1) throws Exception {
 88                 List<String> hostChannels = hotChannelListBroadcast.value();
 89                 String channel = v1.split("	")[4];
 90                 String userId = v1.split("	")[2];
 91                 return hostChannels.contains(channel) && !"null".equals(userId);
 92             }
 93         });
 94         
 95         JavaPairRDD<String, String> channel2UserRDD = filtedRDD.mapToPair(new PairFunction<String, String, String>() {
 96 
 97             @Override
 98             public Tuple2<String, String> call(String v1) throws Exception {
 99                 String[] splited = v1.split("	");
100                 String channel = splited[4];
101                 String userId = splited[2];
102                 return new Tuple2<String, String>(channel,userId);
103             }
104         });
105         
106         channel2UserRDD.groupByKey().foreach(new VoidFunction<Tuple2<String,Iterable<String>>>() {
107             
108             private static final long serialVersionUID = 1L;
109 
110             @Override
111             public void call(Tuple2<String, Iterable<String>> tuple) throws Exception {
112                 String channel = tuple._1;
113                 Iterator<String> iterator = tuple._2.iterator();
114                 Map<String, Integer> userNumMap = new HashMap<>();
115                 while(iterator.hasNext()){
116                     String userId = iterator.next();
117                     Integer count = userNumMap.get(userId);
118                     if(count == null){
119                         count = 1;
120                     }else{
121                         count ++;
122                     }
123                     userNumMap.put(userId, count);
124                 }
125                 
126                 List<SortObj> lists = new ArrayList<>();
127                 Set<String> keys = userNumMap.keySet();
128                 for(String key : keys){
129                     lists.add(new SortObj(key, userNumMap.get(key)));
130                 }
131                 
132                 Collections.sort(lists,new Comparator<SortObj>() {
133 
134                     @Override
135                     public int compare(SortObj O1, SortObj O2) {
136                         return O2.getValue() - O1.getValue();
137                     }
138                 });
139                 
140                 System.out.println("HOT_CHANNEL:"+channel);
141                 for(int i = 0 ; i < 3 ; i++){
142                     SortObj sortObj = lists.get(i);
143                     System.out.println(sortObj.getKey()+"=="+sortObj.getValue());
144                 }
145             }
146         });
147     }
148 }
View Code
result
 
原文地址:https://www.cnblogs.com/haozhengfei/p/dc318c55e428640229a5ef270d78a5f7.html