基于MapReduce的矩阵乘法

参考:http://blog.csdn.net/xyilu/article/details/9066973文章

文字未得及得总结,明天再写文字,先贴代码


package matrix;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class SparseMatrix {

    public static class SparseMatrixMapper extends Mapper<LongWritable, Text, Text, Text> {

        private String flag;// A同现矩阵 or B评分矩阵

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            FileSplit split = (FileSplit) context.getInputSplit();
            flag = split.getPath().getName();// 判断读的数据集

            // System.out.println(flag);
        }
        private static final int  rowNum = 4;// 矩阵A的行数
        public static final int colA=3;    //矩阵A的列数,B的行数
        private static final int colNum = 2;// 矩阵B的列数

        @Override
        public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
            
            String str=values.toString();
            String[] line=Recommend.DELIMITER.split(str);
            if(line.length==0)return;
            if(flag.equals("a.txt")){
                if(line.length!=3)return;
            	String row=line[0];
                String col=line[1];
                String val=line[2];
                Text k=new Text();
                Text v=new Text();
                for(int i=1;i<=colNum;i++){
                	k.set(row+","+i);
                	v.set("a,"+col+","+val);
                	System.out.println(k.toString()+"	"+v.toString());
                	context.write(k, v);
                }
            }
            if(flag.equals("b.txt")){
            	String row=line[0];
                String col=line[1];
                String val=line[2];
                Text k=new Text();
                Text v=new Text();
                for(int i=1;i<=rowNum;i++){
                	k.set(i+","+col);
                	v.set("b,"+row+","+val);
                	context.write(k, v);
                	System.out.println(k.toString()+"	"+v.toString());
                }
            }

        }
	
 
    }
    public static class SparseMatrixReducer extends Reducer<Text, Text, Text, Text> {
    	@Override
        public void reduce(Text key,Iterable<Text> values,  Context context) throws IOException, InterruptedException {
    		Map<String,Double> map=new HashMap<String ,Double>();
    		Double[] A=new Double[SparseMatrixMapper.colA];
    		Double[] B=new Double[SparseMatrixMapper.colA];
    		for(int i=0;i<A.length;i++){
    			A[i]=0.0;
    			B[i]=0.0;
    		}
    		for(Text line:values){
    			String val=line.toString();
    			if(val.contains("a")){
    				String[] arr=Recommend.DELIMITER.split(val);
    				int n=Integer.valueOf(arr[1]);
    				A[n-1]=Double.valueOf(arr[2]);
    			}
    			else if(val.contains("b")){
    				String[] arr=Recommend.DELIMITER.split(val);
    				int n=Integer.valueOf(arr[1]);
    				B[n-1]=Double.valueOf(arr[2]);
    			}
    		}
    		Double sum=0.0;
    		for(int i=0;i<SparseMatrixMapper.colA;i++){
    			sum=sum+A[i]*B[i];
    		}
    		Text v=new Text();
    		v.set(sum.toString());
    		context.write(key, v);
    		
    	}
    }
    public static void run(Map<String, String> path) throws IOException, InterruptedException, ClassNotFoundException {
        JobConf conf = Recommend.config();

        String input1 = path.get("matrixMult");
       
        String output = path.get("matrixMultOut");

        HdfsDAO hdfs = new HdfsDAO(Recommend.HDFS, conf);
        
        hdfs.rmr(output);
        hdfs.rmr(input1);
        hdfs.mkdirs(input1);
        hdfs.copyFile("datafile/week5/SparseMatrix/a.txt", input1);
        hdfs.copyFile("datafile/week5/SparseMatrix/b.txt", input1);
        Job job = new Job(conf);
        job.setJarByClass(SparseMatrix.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(SparseMatrixMapper.class);
        job.setReducerClass(SparseMatrixReducer.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(input1));
        FileOutputFormat.setOutputPath(job, new Path(output));

        job.waitForCompletion(true);
    }
}


版权声明:本文为博主原创文章,未经博主允许不得转载。

原文地址:https://www.cnblogs.com/jamesf/p/4751600.html