MapReduce排序案例

排序:注意排序按照 Key2(Mapper输出的key) 排序,key2 需要实现WritableComparable接口
 

数据准备:

7369,SMITH,CLERK,7902,1980/12/17,800,,20
7499,ALLEN,SALESMAN,7698,1981/2/20,1600,300,30
7521,WARD,SALESMAN,7698,1981/2/22,1250,500,30
7566,JONES,MANAGER,7839,1981/4/2,2975,,20
7654,MARTIN,SALESMAN,7698,1981/9/28,1250,1400,30
7698,BLAKE,MANAGER,7839,1981/5/1,2850,,30
7782,CLARK,MANAGER,7839,1981/6/9,2450,,10
7788,SCOTT,ANALYST,7566,1987/4/19,3000,,20
7839,KING,PRESIDENT,,1981/11/17,5000,,10
7844,TURNER,SALESMAN,7698,1981/9/8,1500,0,30
7876,ADAMS,CLERK,7788,1987/5/23,1100,,20
7900,JAMES,CLERK,7698,1981/12/3,950,,30
7902,FORD,ANALYST,7566,1981/12/3,3000,,20
7934,MILLER,CLERK,7782,1982/1/23,1300,,10
 
 
 
14
 
 
 
 
 
1
7369,SMITH,CLERK,7902,1980/12/17,800,,20
2
7499,ALLEN,SALESMAN,7698,1981/2/20,1600,300,30
3
7521,WARD,SALESMAN,7698,1981/2/22,1250,500,30
4
7566,JONES,MANAGER,7839,1981/4/2,2975,,20
5
7654,MARTIN,SALESMAN,7698,1981/9/28,1250,1400,30
6
7698,BLAKE,MANAGER,7839,1981/5/1,2850,,30
7
7782,CLARK,MANAGER,7839,1981/6/9,2450,,10
8
7788,SCOTT,ANALYST,7566,1987/4/19,3000,,20
9
7839,KING,PRESIDENT,,1981/11/17,5000,,10
10
7844,TURNER,SALESMAN,7698,1981/9/8,1500,0,30
11
7876,ADAMS,CLERK,7788,1987/5/23,1100,,20
12
7900,JAMES,CLERK,7698,1981/12/3,950,,30
13
7902,FORD,ANALYST,7566,1981/12/3,3000,,20
14
7934,MILLER,CLERK,7782,1982/1/23,1300,,10
 
 

SortMapper:

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;

public class SortMapper extends Mapper<LongWritable, Text, Employee, NullWritable> {

    @Override
    protected void map(LongWritable key, Text value,Context context)
            throws IOException, InterruptedException {
        //7499,ALLEN,SALESMAN,7698,1981/2/20,1600,300,30
        String str = value.toString();
        //分词
        String[] words = str.split(",");

        Employee e = new Employee();
        e.setEmpno(Integer.parseInt(words[0]));
        e.setEname(words[1]);
        e.setJob(words[2]);
        try {
            e.setMgr(Integer.parseInt(words[3]));
        } catch (Exception e2) {
            e.setMgr(0);
        }
        e.setHiredate(words[4]);
        e.setSal(Integer.parseInt(words[5]));
        try {
            e.setComm(Integer.parseInt(words[6]));
        } catch (Exception e2) {
            e.setComm(0);
        }		
        e.setDeptno(Integer.parseInt(words[7]));

        //将这个员工输出
        context.write(e, NullWritable.get());
    }
}
 
 
 
40
 
 
 
1
import java.io.IOException;
2

3
import org.apache.hadoop.io.LongWritable;
4
import org.apache.hadoop.io.NullWritable;
5
import org.apache.hadoop.io.Text;
6
import org.apache.hadoop.mapreduce.Mapper;
7
import org.apache.hadoop.mapreduce.Mapper.Context;
8

9
public class SortMapper extends Mapper<LongWritable, Text, Employee, NullWritable> {
10

11
    @Override
12
    protected void map(LongWritable key, Text value,Context context)
13
            throws IOException, InterruptedException {
14
        //7499,ALLEN,SALESMAN,7698,1981/2/20,1600,300,30
15
        String str = value.toString();
16
        //分词
17
        String[] words = str.split(",");
18

19
        Employee e = new Employee();
20
        e.setEmpno(Integer.parseInt(words[0]));
21
        e.setEname(words[1]);
22
        e.setJob(words[2]);
23
        try {
24
            e.setMgr(Integer.parseInt(words[3]));
25
        } catch (Exception e2) {
26
            e.setMgr(0);
27
        }
28
        e.setHiredate(words[4]);
29
        e.setSal(Integer.parseInt(words[5]));
30
        try {
31
            e.setComm(Integer.parseInt(words[6]));
32
        } catch (Exception e2) {
33
            e.setComm(0);
34
        }
35
        e.setDeptno(Integer.parseInt(words[7]));
36

37
        //将这个员工输出
38
        context.write(e, NullWritable.get());
39
    }
 
 
40
}
 
 

实现 WritableComparable 接口的 key2:

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

//7499,ALLEN,SALESMAN,7698,1981/2/20,1600,300,30
public class Employee implements WritableComparable<Employee>{

    private int empno;
    private String ename;
    private String job;
    private int mgr;
    private String hiredate;
    private int sal;
    private int comm;
    private int deptno;

    public Employee(){

    }

    @Override
    public int compareTo(Employee o) {
        // 排序规则
        if(this.sal >= o.getSal()){
            return 1;
        }else{
            return -1;
        }
    }

    @Override
    public String toString() {
        return "Employee [empno=" + empno + ", ename=" + ename + ", job=" + job
                + ", mgr=" + mgr + ", hiredate=" + hiredate + ", sal=" + sal
                + ", comm=" + comm + ", deptno=" + deptno + "]";
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.empno = in.readInt();
        this.ename = in.readUTF();
        this.job = in.readUTF();
        this.mgr = in.readInt();
        this.hiredate = in.readUTF();
        this.sal = in.readInt();
        this.comm = in.readInt();
        this.deptno = in.readInt();
    }

    @Override
    public void write(DataOutput output) throws IOException {
        ////7499,ALLEN,SALESMAN,7698,1981/2/20,1600,300,30
        output.writeInt(empno);
        output.writeUTF(ename);
        output.writeUTF(job);
        output.writeInt(mgr);
        output.writeUTF(hiredate);
        output.writeInt(sal);
        output.writeInt(comm);
        output.writeInt(deptno);
    }

    public int getEmpno() {
        return empno;
    }

    public void setEmpno(int empno) {
        this.empno = empno;
    }

    public String getEname() {
        return ename;
    }

    public void setEname(String ename) {
        this.ename = ename;
    }

    public String getJob() {
        return job;
    }

    public void setJob(String job) {
        this.job = job;
    }

    public int getMgr() {
        return mgr;
    }

    public void setMgr(int mgr) {
        this.mgr = mgr;
    }

    public String getHiredate() {
        return hiredate;
    }

    public void setHiredate(String hiredate) {
        this.hiredate = hiredate;
    }

    public int getSal() {
        return sal;
    }

    public void setSal(int sal) {
        this.sal = sal;
    }

    public int getComm() {
        return comm;
    }

    public void setComm(int comm) {
        this.comm = comm;
    }

    public int getDeptno() {
        return deptno;
    }

    public void setDeptno(int deptno) {
        this.deptno = deptno;
    }
}
 
 
 
131
 
 
 
 
1
import java.io.DataInput;
2
import java.io.DataOutput;
3
import java.io.IOException;
4

5
import org.apache.hadoop.io.Writable;
6
import org.apache.hadoop.io.WritableComparable;
7

8
//7499,ALLEN,SALESMAN,7698,1981/2/20,1600,300,30
9
public class Employee implements WritableComparable<Employee>{
10

11
    private int empno;
12
    private String ename;
13
    private String job;
14
    private int mgr;
15
    private String hiredate;
16
    private int sal;
17
    private int comm;
18
    private int deptno;
19

20
    public Employee(){
21

22
    }
23

24
    @Override
25
    public int compareTo(Employee o) {
26
        // 排序规则
27
        if(this.sal >= o.getSal()){
28
            return 1;
29
        }else{
30
            return -1;
31
        }
32
    }
33

34
    @Override
35
    public String toString() {
36
        return "Employee [empno=" + empno + ", ename=" + ename + ", job=" + job
37
                + ", mgr=" + mgr + ", hiredate=" + hiredate + ", sal=" + sal
38
                + ", comm=" + comm + ", deptno=" + deptno + "]";
39
    }
40

41
    @Override
42
    public void readFields(DataInput in) throws IOException {
43
        this.empno = in.readInt();
44
        this.ename = in.readUTF();
45
        this.job = in.readUTF();
46
        this.mgr = in.readInt();
47
        this.hiredate = in.readUTF();
48
        this.sal = in.readInt();
49
        this.comm = in.readInt();
50
        this.deptno = in.readInt();
51
    }
52

53
    @Override
54
    public void write(DataOutput output) throws IOException {
55
        ////7499,ALLEN,SALESMAN,7698,1981/2/20,1600,300,30
56
        output.writeInt(empno);
57
        output.writeUTF(ename);
58
        output.writeUTF(job);
59
        output.writeInt(mgr);
60
        output.writeUTF(hiredate);
61
        output.writeInt(sal);
62
        output.writeInt(comm);
63
        output.writeInt(deptno);
64
    }
65

66
    public int getEmpno() {
67
        return empno;
68
    }
69

70
    public void setEmpno(int empno) {
71
        this.empno = empno;
72
    }
73

74
    public String getEname() {
75
        return ename;
76
    }
77

78
    public void setEname(String ename) {
79
        this.ename = ename;
80
    }
81

82
    public String getJob() {
83
        return job;
84
    }
85

86
    public void setJob(String job) {
87
        this.job = job;
88
    }
89

90
    public int getMgr() {
91
        return mgr;
92
    }
93

94
    public void setMgr(int mgr) {
95
        this.mgr = mgr;
96
    }
97

98
    public String getHiredate() {
99
        return hiredate;
100
    }
101

102
    public void setHiredate(String hiredate) {
103
        this.hiredate = hiredate;
104
    }
105

106
    public int getSal() {
107
        return sal;
108
    }
109

110
    public void setSal(int sal) {
111
        this.sal = sal;
112
    }
113

114
    public int getComm() {
115
        return comm;
116
    }
117

118
    public void setComm(int comm) {
119
        this.comm = comm;
120
    }
121

122
    public int getDeptno() {
123
        return deptno;
124
    }
125

126
    public void setDeptno(int deptno) {
127
        this.deptno = deptno;
128
    }
129
}
 
 

驱动程序:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SortMain {

    public static void main(String[] args) throws Exception{

        // 求员工工资的总额
        Job job = new Job(new Configuration());

        //指明程序的入口
        job.setJarByClass(SortMain.class);

        //指明任务中的mapper
        job.setMapperClass(SortMapper.class);
        job.setMapOutputKeyClass(Employee.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setOutputKeyClass(Employee.class);
        job.setOutputValueClass(NullWritable.class);

        //指明任务的输入路径和输出路径	---> HDFS的路径
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        //启动任务
        job.waitForCompletion(true);
    }
}
 
 
 
35
 
 
 
 
1
import org.apache.hadoop.conf.Configuration;
2
import org.apache.hadoop.fs.Path;
3
import org.apache.hadoop.io.LongWritable;
4
import org.apache.hadoop.io.NullWritable;
5
import org.apache.hadoop.io.Text;
6
import org.apache.hadoop.mapreduce.Job;
7
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
8
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
9

10
public class SortMain {
11

12
    public static void main(String[] args) throws Exception{
13

14
        // 求员工工资的总额
15
        Job job = new Job(new Configuration());
16

17
        //指明程序的入口
18
        job.setJarByClass(SortMain.class);
19

20
        //指明任务中的mapper
21
        job.setMapperClass(SortMapper.class);
22
        job.setMapOutputKeyClass(Employee.class);
23
        job.setMapOutputValueClass(NullWritable.class);
24

25
        job.setOutputKeyClass(Employee.class);
26
        job.setOutputValueClass(NullWritable.class);
27

28
        //指明任务的输入路径和输出路径---> HDFS的路径
29
        FileInputFormat.addInputPath(job, new Path(args[0]));
30
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
31

32
        //启动任务
33
        job.waitForCompletion(true);
34
    }
35
}
 
 
 



原文地址:https://www.cnblogs.com/TiePiHeTao/p/aa81c25b94ee4c7f239ad10891780cda.html