使用MapReduce处理Hbase数据

　　今天终于把MR处理Hbase的数据的程序搞定了，自己走了好多的弯路，程序写完之后，在本机的伪分布式的hadoop上跑是没问题的，可是把程序上传的集群上就出错了，最后发现是zookeeper没配对，在编译的时候没有把conf添加的CLASSPATH，这才导致出错的。

　　下面是MR测试的程序：

 1 import java.io.IOException;
 2 
 3 import org.apache.hadoop.conf.Configuration;
 4 import org.apache.hadoop.hbase.HBaseConfiguration;
 5 import org.apache.hadoop.hbase.HColumnDescriptor;
 6 import org.apache.hadoop.hbase.HTableDescriptor;
 7 import org.apache.hadoop.hbase.client.HBaseAdmin;
 8 import org.apache.hadoop.hbase.client.Put;
 9 import org.apache.hadoop.hbase.client.Result;
10 import org.apache.hadoop.hbase.client.Scan;
11 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
12 import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
13 import org.apache.hadoop.hbase.mapreduce.TableMapper;
14 import org.apache.hadoop.hbase.mapreduce.TableReducer;
15 import org.apache.hadoop.hbase.util.Bytes;
16 import org.apache.hadoop.io.IntWritable;
17 import org.apache.hadoop.io.Text;
18 import org.apache.hadoop.mapreduce.Job;
19 
20 public class Test {
21     private static final String sourceTable = "sourceTable";
22     private static final String targetTable = "targetTable";
23     static Configuration config = HBaseConfiguration.create();
24     
25     public static void createTable(String tablename, String[] cfs) throws IOException {
26         HBaseAdmin admin = new HBaseAdmin(config);
27         if (admin.tableExists(tablename)) {
28             System.out.println("table already exists");
29         }
30         else {
31             HTableDescriptor tableDesc = new HTableDescriptor(tablename);
32             for (int i = 0; i < cfs.length; i++) {
33                 tableDesc.addFamily(new HColumnDescriptor(cfs[i]));
34             }
35             admin.createTable(tableDesc);
36             System.out.println("create table successly");
37         }
38     }
39     /**
40      * @param args
41      * @throws IOException
42      * @throws ClassNotFoundException 
43      * @throws InterruptedException 
44      */
45     public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
46         // TODO Auto-generated method stub
47         String[] cfs = {"a"};
48         createTable(targetTable, cfs);
49         Job job = new Job(config, "test");
50         job.setJarByClass(Test.class);
51         Scan scan = new Scan();
52         scan.setCaching(1024);
53         scan.setCacheBlocks(false);
54         TableMapReduceUtil.initTableMapperJob(
55                 sourceTable,        
56                 scan,               
57                 Mapper1.class,    
58                 Text.class,         
59                 IntWritable.class,  
60                 job);    
61         TableMapReduceUtil.initTableReducerJob(
62                 targetTable,        
63                 Reducer1.class,    
64                 job);
65         boolean b = job.waitForCompletion(true);
66         if(!b){
67             throw new IOException("error");
68         }
69     }
70 
71     public static class Mapper1 extends
72             TableMapper<Text, IntWritable> {
73             private final IntWritable ONE = new IntWritable(1);
74             private Text text = new Text();
75             public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException{
76                 String id = new String(value.getValue(Bytes.toBytes("cf"), Bytes.toBytes("a")));
77                 text.set(id);
78                     context.write(id, ONE);
79             }
80     }
81     public static class Reducer1 extends TableReducer<Text, IntWritable, ImmutableBytesWritable>{
82         public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException{
83             int i = 0;
84             for (IntWritable val : values){
85                 i += val.get();
86             }
87             Put put = new Put(Bytes.toBytes(key.toString()));
88             put.add(Bytes.toBytes("a"), Bytes.toBytes("c"), Bytes.toBytes(i));
89             context.write(null, put);
90         }
91     }
92 }

编写完成后需要打包，打包可以在本地打，也可以在服务器上的包，一定要设置CLASSPATH

export CLASSPATH = /data/hadoop/hadoop-1.0.4/hadoop-core-1.0.4.jar:/data/hadoop/hbase-0.94.2/hbase-0.94.2.jar:/data/hadoop/hbase-0.94.2/conf/

在终端运行这个命令或者直接将此命令下载家目录下的.bashrc中也可以，

然后创建 test_classes文件夹，

运行命令：

javac -d test_classes/ Test.java

运行完成后会在test_classes文件夹下生成3个.class文件

然后运行

jar -cvf test.jar -C test_classes .

即可生成test.jar 文件

最后运行:

bin/hadoop jar test.jar Test

运行MR程序即可