MR框架-->Word3

页面浏览量统计功能实现:

  统计页面量功能:

思路:统计页面浏览功能就是统计访问记录的总条数,因为还没学习SQL方式进行统计,使用MapReduce变成的方式,我要做的就是把一行记录做成一个固定的key,然后value复制为1,在Reduce阶段解析累脚操作

用到了GetPageId

Mapper类:

static class LogMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
        //定义ONE全局变量赋值1
        private IntWritable ONE = new IntWritable(1);

        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            //读取一行日志
            String log = value.toString();
            //解析日志然后查询url
            Map<String, String> info = new LogParser().parse(log);
            String url = info.get("url");//key
            //通过url找id
            String id = new GetPageId().getPageId(url);
            //写入上下文
            context.write(new Text(id), ONE);
        }
    }

Reducer类

static class LogReducer extends Reducer<Text, IntWritable, NullWritable, IntWritable>{
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,
                Reducer<Text, IntWritable, NullWritable, IntWritable>.Context context)
                throws IOException, InterruptedException {
            //定义一个空变量
            int sum = 0 ;
            //遍历
            for (IntWritable value : values) {
                sum += value.get();
            }
            //将结果写入上下文
            context.write(NullWritable.get(), new IntWritable(sum));
        }
    }

Submit类

public static void main(String[] args) throws Exception {
                // 加载配置文件
                Configuration conf = new Configuration();
                //创建hdfs对象
                FileSystem fs = FileSystem.get(conf);
                //判断输出路径是否重复
                if(fs.exists(new Path(args[1]))) {
                    fs.delete(new Path(args[1]),true);
                }
                // 创建Job对象
                Job job = Job.getInstance(conf);
                // 设置提交主类
                job.setJarByClass(PathApp.class);
                // 设置Mapper类相关的参数
                job.setMapperClass(LogMapper.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(IntWritable.class);
                // 设置Reducer类相关的参数
                job.setReducerClass(LogReducer.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
                // 设置输入路径
                FileInputFormat.setInputPaths(job, new Path(args[0]));
                // 设置输出路径
                FileOutputFormat.setOutputPath(job, new Path(args[1]));
                // 提交任务
                job.waitForCompletion(true);
    }
原文地址:https://www.cnblogs.com/wyk1/p/13941515.html