跟踪wordcount计数器的运行信息

1、mapper类

package com.cr.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WorcountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
    //无参构造
    public WorcountMapper(){
        System.out.println("-->new WorcountMapper");
    }
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        Text keyOut = new Text();
        IntWritable valueout = new IntWritable();
        String[] arr = value.toString().split(" ");
        for(String s : arr){
            keyOut.set(s);
            valueout.set(1);
            context.write(keyOut,valueout);
            //每调用一次该mapper文件,就记录一次
            context.getCounter("m组",Utils.getInfo(this,"map类")).increment(1);
        }
    }
    }


2、reducer类

package com.cr.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WordcountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
    //无参构造
    public WordcountReducer(){
        System.out.println("-->new WordcountReducer");
    }
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int count = 0;
        for(IntWritable iw : values){
            count += iw.get();
        }
        //获取当前线程
        String tno = Thread.currentThread().getName();
        System.out.println("线程==>"+ tno + "===>  reducer ===>  " + key.toString() + "===>" + count);
        //进入一次reducer就加1次
//        context.getCounter("==reduce===","===WordcountReducer.reduce===").increment(1);
        context.getCounter("r组",Utils.getInfo(this,"reducer类")).increment(1);
        context.write(key,new IntWritable(count));

    }
}

3、计数器主函数

package com.cr.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.examples.WordMean;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class WordcountApp {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //单例作业
        Configuration conf = new Configuration();
//        conf.set("fs.defaultFS","file:///");
        Job job = Job.getInstance(conf);

        //设置job的各种属性
        job.setJobName("wordcountAPP");                 //设置job名称
        job.setJarByClass(WordcountApp.class);              //设置搜索类
        job.setInputFormatClass(TextInputFormat.class);

        //设置输入路径
        FileInputFormat.addInputPath(job,new Path((args[0])));
        //设置输出路径
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

//        //设置分区
//        job.setPartitionerClass(MyPartitioner.class);
//        //设置合成类s
        job.setCombinerClass(WordcountReducer.class);


        job.setMapperClass(WorcountMapper.class);               //设置mapper类
        job.setReducerClass(WordcountReducer.class);               //设置reduecer类
        job.setNumReduceTasks(3);                         //设置reduce个数

        job.setMapOutputKeyClass(Text.class);            //设置之map输出key
        job.setMapOutputValueClass(IntWritable.class);   //设置map输出value
        job.setOutputKeyClass(Text.class);               //设置mapreduce 输出key
        job.setOutputValueClass(IntWritable.class);      //设置mapreduce输出value
        job.waitForCompletion(true);
    }
}

4、工具类获取进程

package com.cr.wordcount;

import java.lang.management.ManagementFactory;
import java.net.InetAddress;
import java.net.UnknownHostException;

public class Utils {

    public static String getInfo(Object o,String msg){
        return "主机名:" + getHostname() + "=>" + "进程id" + getPID() + "=>" + "线程ID" +  getTID() + "=>" + "类对象信息" + getObjInfo(o) + "=>" + "运行类" + msg;
    }

    /**
     * 获取主机名
     * @return
     */
    public static String getHostname(){
        try {
            return InetAddress.getLocalHost().getHostName();
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * 获取当前程序所在进程id
     * @return
     */
    public static int getPID(){
        String info = ManagementFactory.getRuntimeMXBean().getName();
        return Integer.parseInt(info.substring(0,info.indexOf("@")));
    }

    /**
     * 获取当前程序所在进程id
     * @return
     */
    public static String getTID(){
        return Thread.currentThread().getName();
    }

    /**
     * 获取对象的简单类名和哈希值
     * @param o
     * @return
     */
    public static String getObjInfo(Object o){
        String sname = o.getClass().getSimpleName();
        return sname + "@" + o.hashCode();

    }
}


5、添加core-site.xml和yarn-site.xml

6、打包,运行在Hadoop集群上

待计数的文件有三个
drwxr-xr-x   - xiaoqiu supergroup          0 2018-01-05 09:10 /user
drwxr-xr-x   - xiaoqiu supergroup          0 2018-01-05 10:44 /user/xiaoqiu
drwxr-xr-x   - xiaoqiu supergroup          0 2018-01-05 12:06 /user/xiaoqiu/data
-rw-r--r--   2 xiaoqiu supergroup         12 2018-01-05 10:45 /user/xiaoqiu/data/wc.txt
-rw-r--r--   2 xiaoqiu supergroup         18 2018-01-05 12:06 /user/xiaoqiu/data/wc1.txt
-rw-r--r--   2 xiaoqiu supergroup         16 2018-01-05 12:06 /user/xiaoqiu/data/wc2.txt

运行jar包

[xiaoqiu@s150 /home/xiaoqiu]$ hadoop jar wordcounter.jar com.cr.wordcount.WordcountApp hdfs://s150/user/xiaoqiu/data hdfs://s150/user/xiaoqiu/data/out

7、运行结果

        File System Counters
                FILE: Number of bytes read=124
                FILE: Number of bytes written=732401
                FILE: Number of read operations=0
                FILE: Number of large read operations=0
                FILE: Number of write operations=0
                HDFS: Number of bytes read=351
                HDFS: Number of bytes written=44
                HDFS: Number of read operations=18
                HDFS: Number of large read operations=0
                HDFS: Number of write operations=6
        Job Counters
                Killed map tasks=2
                Launched map tasks=4
                Launched reduce tasks=3
                Data-local map tasks=4
                Total time spent by all maps in occupied slots (ms)=79906
                Total time spent by all reduces in occupied slots (ms)=74886
                Total time spent by all map tasks (ms)=79906
                Total time spent by all reduce tasks (ms)=74886
                Total vcore-milliseconds taken by all map tasks=79906
                Total vcore-milliseconds taken by all reduce tasks=74886
                Total megabyte-milliseconds taken by all map tasks=81823744
                Total megabyte-milliseconds taken by all reduce tasks=76683264
        Map-Reduce Framework
                Map input records=3
                Map output records=10
                Map output bytes=86
                Map output materialized bytes=160
                Input split bytes=305
                Combine input records=10
                Combine output records=10
                Reduce input groups=7
                Reduce shuffle bytes=160
                Reduce input records=10
                Reduce output records=7
                Spilled Records=20
                Shuffled Maps =9
                Failed Shuffles=0
                Merged Map outputs=9
                GC time elapsed (ms)=646
                CPU time spent (ms)=5080
                Physical memory (bytes) snapshot=899522560
                Virtual memory (bytes) snapshot=12507885568
                Total committed heap usage (bytes)=416440320
        Shuffle Errors
                BAD_ID=0
                CONNECTION=0
                IO_ERROR=0
                WRONG_LENGTH=0
                WRONG_MAP=0
                WRONG_REDUCE=0
        m组
                主机名:s150=>进程id6421=>线程IDmain=>类对象信息WorcountMapper@1910896157=>运=4
                主机名:s151=>进程id3857=>线程IDmain=>类对象信息WorcountMapper@479734028=>运行=2
                主机名:s152=>进程id3080=>线程IDmain=>类对象信息WorcountMapper@479734028=>运行=4
        File Input Format Counters
                Bytes Read=46
        File Output Format Counters
                Bytes Written=44
        r组
                主机名:s150=>进程id6421=>线程IDmain=>类对象信息WordcountReducer@612693043=>=2
                主机名:s150=>进程id6421=>线程IDmain=>类对象信息WordcountReducer@878991463=>=2
                主机名:s151=>进程id3816=>线程IDmain=>类对象信息WordcountReducer@275056979=>=3
                主机名:s151=>进程id3857=>线程IDmain=>类对象信息WordcountReducer@905847077=>=1
                主机名:s151=>进程id3857=>线程IDmain=>类对象信息WordcountReducer@908384914=>=1
                主机名:s152=>进程id3080=>线程IDmain=>类对象信息WordcountReducer@156199931=>=1
                主机名:s152=>进程id3080=>线程IDmain=>类对象信息WordcountReducer@905847077=>=1
                主机名:s152=>进程id3080=>线程IDmain=>类对象信息WordcountReducer@908384914=>=2
                主机名:s152=>进程id3128=>线程IDmain=>类对象信息WordcountReducer@275056979=>=2
                主机名:s152=>进程id3168=>线程IDmain=>类对象信息WordcountReducer@275056979=>=2


欢迎关注我的公众号:小秋的博客 CSDN博客:https://blog.csdn.net/xiaoqiu_cr github:https://github.com/crr121 联系邮箱:rongchen633@gmail.com 有什么问题可以给我留言噢~
原文地址:https://www.cnblogs.com/flyingcr/p/10326962.html