推荐系统-03-简单基于用户的推荐

下面是一个基本的JVAVA程序, RecommenderIntro.java

package xyz.pl8.recommenderintro;

import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import java.io.File;
import java.util.List;

public class RecommenderIntro {
    public static void main(String[] args){
        try{
            // intro.csv格式 userId,itemId,rating
            DataModel model = new FileDataModel(new File("/home/hadoop/intro.csv"));
            System.out.println(model);
            // 用户相似度
            UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
            // K近邻用户
            UserNeighborhood neighborhood = new NearestNUserNeighborhood(3,  similarity, model );
            // 基于用户的推荐器
            Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
            //  推荐物品
            List<RecommendedItem> recommendedItems = recommender.recommend(2, 2);
            for (RecommendedItem item : recommendedItems){
                System.out.println(item);
            }

        }catch (Exception e){
            e.printStackTrace();
        }
    }
}

下面是基于物品的多线程批推荐

package xyz.pl8.userrecommendermovielens;

import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.precompute.FileSimilarItemsWriter;
import org.apache.mahout.cf.taste.impl.similarity.precompute.MultithreadedBatchItemSimilarities;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities;
import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;

import java.io.File;
import java.io.IOException;

public class BatchItemSimilaritiesIntro {
    public static void main(String[] args) throws IOException {
        if (args.length !=1 ){
            System.err.println("Need dataset file as argument!");
            System.exit(-1);
        }

        File resultFile = new File(System.getProperty("java.io.tmpdir"), "similarity.csv");
        DataModel dataModel = new MovieLensDataModel(new File(args[0]));
        ItemSimilarity similarity = new LogLikelihoodSimilarity(dataModel);
        ItemBasedRecommender recommender = new GenericItemBasedRecommender(dataModel, similarity);
        BatchItemSimilarities batchItemSimilarities = new MultithreadedBatchItemSimilarities(recommender, 5);
        SimilarItemsWriter writer = new FileSimilarItemsWriter(resultFile);
        int numSimilarities = batchItemSimilarities.computeItemSimilarities(Runtime.getRuntime().availableProcessors(), 1,writer);
        System.out.println("Computed " + numSimilarities + " for " +  " items and saved them to " + resultFile.getAbsolutePath());
    }
}


package xyz.pl8.userrecommendermovielens;

import org.apache.commons.io.Charsets;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.common.iterator.FileLineIterable;
import org.omg.CORBA.PUBLIC_MEMBER;

import java.io.*;
import java.nio.charset.Charset;
import java.util.regex.Pattern;

public class MovieLensDataModel extends FileDataModel {
    private static String COLON_DELIMITER = "::";
    private  static Pattern COLON_DELIMITTER_PATTERN = Pattern.compile(COLON_DELIMITER);

    public MovieLensDataModel(File ratingsFile) throws IOException{

        super(convertFile(ratingsFile));

    }

    public static File convertFile(File originalFile) throws IOException{
        File resultFile = new File(System.getProperty("java.io.tmpdir"), "ratings.csv");
        if (resultFile.exists()){
            resultFile.delete();
        }

        try {

            Writer writer = new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8);
            for (String line : new FileLineIterable(originalFile, false)) {
                int lastIndex = line.lastIndexOf(COLON_DELIMITER);
                if (lastIndex < 0) {
                    throw new IOException("Invalid data!");
                }
                String subLine = line.substring(0, lastIndex);
                String convertedSubLne = COLON_DELIMITTER_PATTERN.matcher(subLine).replaceAll(",");
                lastIndex = convertedSubLne.lastIndexOf(",");
                if (lastIndex <= 0) {
                    continue;
                }
                writer.write(convertedSubLne);
                writer.write('
');
            }
            writer.close();
        }catch (Exception e){
            resultFile.delete();

        }
        return  resultFile;
    }
}

原文地址:https://www.cnblogs.com/freebird92/p/9047488.html