Mahout使用(一)

1.HelloMahout.java
2.DistanceTest.java
3.MahoutDemo.java

1.HelloMahout.java

 1 package cn.crxy.mahout;
 2 
 3 import java.io.File;
 4 import java.util.List;
 5 
 6 import org.apache.log4j.Logger;
 7 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
 8 import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
 9 import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
10 import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
11 import org.apache.mahout.cf.taste.model.DataModel;
12 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
13 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
14 import org.apache.mahout.cf.taste.recommender.Recommender;
15 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
16 
17 public class HelloMahout {
18 
19     public static void main(String[] args) {
20         
21         Logger logger=Logger.getLogger(HelloMahout.class);
22         try {
23             //读取用户评分数据    封装成一个model
24             DataModel model = new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\info.csv"));
25             // 根据相似度找出对应的好朋友的标准     物以类聚,人以群分
26             UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(model);
27             // 邻域 选择两个好朋友帮我推荐
28             UserNeighborhood userNeighborhood = new NearestNUserNeighborhood(2,userSimilarity, model);
29             // 构建推荐引擎
30             Recommender recommender = new GenericUserBasedRecommender(model,userNeighborhood, userSimilarity);
31             // 进行推荐
32             List<RecommendedItem> recommend = recommender.recommend(1, 5);
33             for (RecommendedItem item : recommend) {
34                 logger.info(item);
35             }
36         } catch (Exception e) {
37             logger.error(e.getMessage());
38         }
39     }
40 }
View Code

2.DistanceTest.java

 1 package cn.crxy.mahout;
 2 
 3 import org.junit.Before;
 4 import org.junit.Test;
 5 
 6 public class DistanceTest {
 7     
 8     //    水果维度依次为:苹果、梨、桃子、栗子、香蕉、橘子
 9     //    小明:5,4,2,1,5,5
10     //    小丽:5,3,1,2,1,1
11     //    小王:5,3,4,1,4,3
12     private int[] a;
13     private int[] b;
14     private int[] c;
15     
16     @Before
17     public void initData(){
18         a=new int[]{5,4,2,1,5,5};
19         b=new int[]{5,3,1,2,1,1};
20         c=new int[]{5,3,4,1,4,3};
21     }
22     
23     @Test
24     public void Distance(){
25 //        a-b:5.916079783099616
26 //        a-c:3.1622776601683795
27 //        c-b:4.795831523312719
28 
29         System.out.println(String.format("a-b:%s", 1.0/(1.0+Man(a, b))));
30         System.out.println(String.format("a-c:%s", 1.0/(1.0+Man(a, c))));
31         System.out.println(String.format("c-b:%s", 1.0/(1.0+Man(c, b))));
32 //        a-b:0.08333333333333333
33 //        a-c:0.14285714285714285
34 //        c-b:0.1
35         
36     }
37     //欧式距离
38     private double ErluD(int[] a_array,int[] b_array){
39         double result=0;
40         for (int i = 0; i < a_array.length; i++) {
41             result+=Math.pow(a_array[i]-b_array[i],2);
42         }
43         return Math.sqrt(result);
44     }
45     //曼哈顿距离
46     private double Man(int[] a_array,int[] b_array){
47         double result=0;
48         for (int i = 0; i < a_array.length; i++) {
49             result+=Math.abs(a_array[i]-b_array[i]);
50         }
51         return result;
52     }
53     //min式距离
54     private double Min(int[] a_array,int[] b_array,int p){
55         double result=0;
56         for (int i = 0; i < a_array.length; i++) {
57             result+=Math.pow(Math.abs(a_array[i]-b_array[i]),p);
58         }
59         return Math.pow(result,1.0/p);
60     }
61 
62 }
View Code

3.MahoutDemo.java

  1 package cn.crxy.mahout;
  2 
  3 import java.io.File;
  4 import java.util.List;
  5 
  6 import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
  7 import org.apache.mahout.cf.taste.impl.common.FastIDSet;
  8 import org.apache.mahout.cf.taste.impl.model.GenericPreference;
  9 import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
 10 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
 11 import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
 12 import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
 13 import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
 14 import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
 15 import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
 16 import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
 17 import org.apache.mahout.cf.taste.impl.similarity.CachingUserSimilarity;
 18 import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
 19 import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
 20 import org.apache.mahout.cf.taste.model.DataModel;
 21 import org.apache.mahout.cf.taste.model.PreferenceArray;
 22 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
 23 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 24 import org.apache.mahout.cf.taste.recommender.Recommender;
 25 import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
 26 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 27 import org.junit.Before;
 28 import org.junit.Ignore;
 29 import org.junit.Test;
 30 
 31 public class MahoutDemo {
 32 
 33     //组装datamodel
 34     
 35     // userid itemid score
 36     // 101 102 103 104
 37     // 1(5,4,2,)
 38     // 2(,2,4,1)
 39     // 3(4,3,1,)
 40     DataModel dataModel;
 41     
 42     @Before
 43     public void initData() throws Exception{
 44         //每一个用户的喜好列表 key:用户id  value:该用户的偏好列表
 45         FastByIDMap<PreferenceArray> data=new FastByIDMap<PreferenceArray>();
 46         //组装第一个用户 偏好列表
 47         PreferenceArray array1=new GenericUserPreferenceArray(3);
 48         //PreferenceArray index 指:偏好列表的index 序号。
 49         array1.setUserID(0, 1);
 50         array1.setItemID(0, 101);
 51         array1.setValue(0, 5);
 52         
 53         array1.setUserID(1, 1);
 54         array1.setItemID(1, 102);
 55         array1.setValue(1, 4);
 56         
 57         array1.setUserID(2, 1);
 58         array1.setItemID(2, 103);
 59         array1.setValue(2, 2);
 60         
 61         data.put(1, array1);
 62         
 63         //组装第二个喜好
 64         PreferenceArray array2=new GenericUserPreferenceArray(3);
 65         //2(,2,4,1)
 66         array2.set(0, new GenericPreference(2,102,2));
 67         array2.set(1, new GenericPreference(2,103,4));
 68         array2.set(2, new GenericPreference(2,104,1));
 69         data.put(2, array2);
 70         //组装第三个喜好
 71         PreferenceArray array3=new GenericUserPreferenceArray(3);
 72         //3(4,3,1,)
 73         array3.set(0, new GenericPreference(3,101,4));
 74         array3.set(1, new GenericPreference(3,102,3));
 75         array3.set(2, new GenericPreference(3,103,1));
 76         data.put(3, array3);
 77         
 78         //dataModel=new GenericDataModel(data);
 79 //        dataModel=new GenericBooleanPrefDataModel(userData);
 80 //        System.out.println(dataModel.getPreferenceValue(1, 102));//获得1用户对102的评分
 81 //        System.out.println(dataModel.getItemIDsFromUser(1));
 82 //        System.out.println(dataModel.getUserIDs());
 83         
 84         
 85         //1  101 102 103
 86         //2 102 103 
 87         // key为userid value:物品的集合 set
 88         FastByIDMap<FastIDSet> userData=new FastByIDMap<FastIDSet>();
 89         
 90         FastIDSet userSet1=new FastIDSet(3);
 91         userSet1.add(101);
 92         userSet1.add(102);
 93         userSet1.add(103);
 94         userData.put(1,userSet1);
 95         
 96         FastIDSet userSet2=new FastIDSet(2);
 97         userSet2.add(102);
 98         userSet2.add(103);
 99         userData.put(2,userSet2);
100         
101         
102         //无偏好的构建
103 //        dataModel=new GenericBooleanPrefDataModel(userData);
104         
105         
106         //读取文件 有偏好的
107         dataModel=new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\info.csv"));
108         //读取文件 无偏好的 无偏好的数据只有用户和其关联的商品 没有对应商品的评分
109 //        dataModel=new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\ubool.data"));
110         
111         
112 //        对于无偏好数据:getvalue:如果存在记录则是1.0;否则为null。  
113 //        System.out.println(dataModel.getPreferenceValue(1, 103));
114 //        System.out.println(dataModel.getItemIDsFromUser(1));
115 //        System.out.println(dataModel.getUserIDs());
116         
117     }
118     @Ignore
119     public void testUserSimi() throws Exception{
120         
121         //利用model和相似度函数 计算用户相似度
122 //        UserSimilarity userSimilarity=new TanimotoCoefficientSimilarity(dataModel);
123         UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel);
124         userSimilarity=new CachingUserSimilarity(userSimilarity, dataModel);
125         //查询用户之间的相似度  0.9999999999999998    0.944911182523068
126         //如果使用CachingUserSimilarity userSimilarity(1,5) 第二次不会再次计算了
127         System.out.println(userSimilarity.userSimilarity(1, 5));
128         System.out.println(userSimilarity.userSimilarity(1, 5));
129     }
130     @Ignore
131     public void testItemSimi() throws Exception{
132         
133         //利用model和相似度函数 计算物品相似度
134         ItemSimilarity itemSimilarity=new PearsonCorrelationSimilarity(dataModel);
135         itemSimilarity =new CachingItemSimilarity(itemSimilarity,dataModel);
136         //查询物品之间的相似度 0.9449111825230729
137         System.out.println(itemSimilarity.itemSimilarity(101, 102));
138     }
139     @Test
140     public void testuserNeighborhood() throws Exception{
141         //相似度   有相似度才能算邻居是谁
142         UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel);
143         //1.固定数目的邻居  如果取邻居 只取前三个 
144         UserNeighborhood userNeighborhood=new NearestNUserNeighborhood(3,userSimilarity,dataModel);
145         long[] userNeighborhoods = userNeighborhood.getUserNeighborhood(1);//为1用户取得用户
146         for (long l : userNeighborhoods) {
147             System.out.println(l+"NearestNUserNeighborhoodsimi---"+userSimilarity.userSimilarity(1, l));
148         }
149 //        4NearestNUserNeighborhoodsimi---0.9999999999999998
150 //        5NearestNUserNeighborhoodsimi---0.944911182523068
151 //        2NearestNUserNeighborhoodsimi---   -0.7642652566278799这个是负0.7
152 
153     
154         //2.固定阈值的邻居  只要0.8以上的
155         userNeighborhood=new ThresholdUserNeighborhood(0.7,userSimilarity,dataModel);
156         long[] userNeighborhoodsnew = userNeighborhood.getUserNeighborhood(1);
157         System.out.println(userSimilarity.userSimilarity(1, 2)); //查看1和2的相似度
158         for (long l : userNeighborhoodsnew) {
159             System.out.println(l+"ThresholdUserNeighborhoodsimi---"+userSimilarity.userSimilarity(1, l));
160         }
161         
162     }
163     @Test
164     public void testItemCmd() throws Exception{
165         //1.基于物品的有偏好的推荐   基于物品的不需要邻居
166 //        ItemSimilarity itemSimilarity=new PearsonCorrelationSimilarity(dataModel);
167 //        Recommender recommender=new GenericItemBasedRecommender(dataModel,itemSimilarity);
168         
169         //2.基于物品的无偏好推荐
170         ItemSimilarity itemSimilarity=new TanimotoCoefficientSimilarity(dataModel);
171         Recommender recommender=new GenericBooleanPrefItemBasedRecommender(dataModel,itemSimilarity);
172         
173         
174         List<RecommendedItem> recommend = recommender.recommend(1, 3);//给用户1推荐3个.
175         for (RecommendedItem recommendedItem : recommend) {
176             System.out.println(recommendedItem);
177             //1.基于物品的有偏好的推荐RecommendedItem[item:104, value:5.0]其他的推荐不出来了....所以只推荐出了1个
178             
179             //2.基于物品的无偏好的推荐
180             //RecommendedItem[item:104, value:1.8]
181             //RecommendedItem[item:106, value:1.15]
182             //RecommendedItem[item:105, value:0.85]
183         }
184     }
185     @Test
186     public void testUserCmd() throws Exception{
187         //1.基于用户的有偏好的推荐
188         //UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel);
189         //2.基于用户的无偏好的推荐
190         UserSimilarity userSimilarity=new TanimotoCoefficientSimilarity(dataModel);
191         
192         UserNeighborhood userNeighborhood=new NearestNUserNeighborhood(3,userSimilarity,dataModel);//Top 3
193         //构建推荐对象
194         Recommender recommender=new GenericUserBasedRecommender(dataModel,userNeighborhood,userSimilarity);
195         List<RecommendedItem> recommend = recommender.recommend(1, 3);
196         for (RecommendedItem recommendedItem : recommend) {
197             System.out.println(recommendedItem);
198             //1.基于用户的有偏好推荐
199             //RecommendedItem[item:104, value:5.0]
200             //RecommendedItem[item:106, value:4.0]
201             //2.基于用户的无偏好推荐
202             //RecommendedItem[item:106, value:4.0]
203             //RecommendedItem[item:104, value:3.2121212]
204 
205         }
206     }
207     
208     
209 }
View Code
原文地址:https://www.cnblogs.com/DreamDrive/p/5931126.html