hadoop-job(mapReducer计算单词出现的个数)

1.============map===============

package com.it18zhang.hadoop.mr;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
* Mapper
*/
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
/**
* key : 行首偏移量,字节数,意义不大。
* value : 一行文本
*/
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//
String line = value.toString() ;
String[] arr = line.split(" ");

Text keyOut = new Text() ;
IntWritable valueOut = new IntWritable(1) ;
for(String word : arr){
keyOut.set(word);
context.write(keyOut,valueOut);
}
}
}

2.============refucer===============

package com.it18zhang.hadoop.mr;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
* reducer
*/
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
/**
* key : word
* values : 该key下聚合的value
*/
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0 ;
for(IntWritable iw : values){
count = count + iw.get() ;
}
context.write(key , new IntWritable(count));
}
}

3.============统计===============

package com.it18zhang.hadoop.mr;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
public class App {
public static void main(String[] args) throws Exception {
if(args == null || args.length<2){
throw new Exception("参数不足,需要2个参数");
}
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
//递归删除输出目录
fs.delete(new Path(args[1]),true);

//创建一个作业
Job job = Job.getInstance(conf);
//调用job方法 名字随便期(word_count_add )
job.setJobName("word_count_add");
//获取类的路径
job.setJarByClass(App.class);

// //需要计算的文件路径
// FileInputFormat.addInputPath(job,new Path("file:///Users/yangyanqing/godev/wc"));
// //计算后文件输出
// FileOutputFormat.setOutputPath(job,new Path("file:///Users/yangyanqing/godev/wc/out"));
//需要计算的文件路径
FileInputFormat.addInputPath(job,new Path(args[0]));
//计算后文件输出
FileOutputFormat.setOutputPath(job,new Path(args[1]));

//设置mapper类和reducer类
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);

//输出mapper类和reducer类的类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class );
//设置readuce个数
job.setNumReduceTasks(1);
//开始作业
job.waitForCompletion(true);
}
}

原文地址:https://www.cnblogs.com/nyfz/p/9041992.html