使用BulkLoad恢复hbase数据

问题:

hbase 集群启动不了,maste一直在初始化,数据面临丢失风险。

解决:

  1. 把hbfs上 /hbase 目录移走 改名为/hbase-bak

  2. 删除zk上的数据,重新建立一个新的hbase集群.

  3. 使用如下代码, 把/hbase-bak里面的数据恢复到 现有的hbase集群中.

public class RestoreData {

    public static final String HBASE_ZOOKEEPER_QUORUM = "hbase.zookeeper.quorum";
    public static final String HBASE_CLIENT_RETRIES_NUMBER = "hbase.client.retries.number";
    public static final String HBASE_MASTER_INFO_PORT = "hbase.master.info.port";
    public static final String HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT = "hbase.zookeeper.property.clientPort";
    public static final String HBASE_RPC_TIMEOUT = "hbase.rpc.timeout";
    public static final String HBASE_CLIENT_OPERATION_TIMEOUT = "hbase.client.operation.timeout";
    public static final String HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD = "hbase.client.scanner.timeout.period";
    public static final String HBASE_WRITEBUFFER_SIZE = "hbase.writebuffer.size";

    private static Connection hbaseConn;
    private static Configuration hbaseConf;

    public static void main(String[] args) throws IOException {
        hbaseConf = HBaseConfiguration.create();
        hbaseConf.set(HBASE_ZOOKEEPER_QUORUM, "master");
        hbaseConf.set(HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT, "2181");
        hbaseConf.set(HBASE_RPC_TIMEOUT, "30000");
        hbaseConf.set(HBASE_CLIENT_OPERATION_TIMEOUT, "30000");
        hbaseConf.set(HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, "30000");
        hbaseConn = ConnectionFactory.createConnection(hbaseConf);

        String hfile = "hdfs://master:9000/hbase-bak/data/default/student/ab4112439aee7a2dea88f1b5119f0f4d";
        String tableName = "student";
        String[] array = new String[2];
        array[0] = hfile;
        array[1]  = tableName;
       
        try {
            new LoadIncrementalHFiles(hbaseConf).run(array);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}


pom文件

<dependencies>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.4.13</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.4.13</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>1.4.13</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-core</artifactId>
            <version>1.0.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.8.3</version>
        </dependency>
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-hadoop2-compat</artifactId>
            <version>1.4.13</version>
        </dependency>

    </dependencies>

打包

脚本

在集群上运行的脚本

export LD_LIBRARY_PATH=/data/hadoop-2.8.3/lib/native

java -cp /data/hadoop/data/hbase_production.jar
:/data/hbase/lib/log4j-1.2.17.jar
:/data/hbase/lib/slf4j-log4j12-1.7.25.jar
:/data/hbase/lib/slf4j-api-1.7.25.jar
:/data/hbase/lib/snappy-java-1.0.5.jar
:/data/hadoop-2.8.3/share/hadoop/common/hadoop-nfs-2.8.3.jar
:/data/hadoop-2.8.3/share/hadoop/common/hadoop-common-2.8.3.jar
com.xxxx.hbase.RestoreData 
hdfs://master:9000/hbase-bak/data/default/student/$1 
student

执行

./脚本  HFile路径

也可用于hbase 备份.

原文地址:https://www.cnblogs.com/weijiqian/p/14122755.html