统计Azure存储的HBase各表数据量

场景:HBase存储在Azure上,现在通过访问Azure Storage的接口,获取HBase中各个表的数据量。

注意:

1、Azure存储,默认的副本数为2,即共存3份,但只收1份的费用,取到的size也是1份的大小。如果是自建HDFS,则不同。

2、此处访问的是Azure Storage的接口,还可以访问HBase的接口来获取数据量(另行验证)。

c#代码:

using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Auth;
using Microsoft.WindowsAzure.Storage.Blob;
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;

   public class HBaseResourceFetcher
    {

        private CloudBlobContainer blobContainer;

        public HBaseResourceFetcher()
        {
            StorageCredentials storageCred = new StorageCredentials(
                AppConfigGetter.Get(ConfigConstants.KEY_STORAGEACCOUNTNAME), 
                AppConfigGetter.Get(ConfigConstants.KEY_STORAGEACCOUNTKEY));
            CloudStorageAccount storageAccount = new CloudStorageAccount(storageCred,
                AppConfigGetter.Get(ConfigConstants.KEY_ENDPOINTSUFFIX), true);
            var blobClient = storageAccount.CreateCloudBlobClient();
            this.blobContainer = blobClient.GetContainerReference(
                AppConfigGetter.Get(ConfigConstants.KEY_STORAGECONTAINERNAME));
        }

        public Dictionary<string, ResourceEntity> GetHBaseTableSizeInfo()
        {
            Dictionary<string, ResourceEntity> result = new Dictionary<string, ResourceEntity>();
            CloudBlobDirectory directory = this.blobContainer.GetDirectoryReference("hbase/data/default");
            if (directory == null)
                return result;

            var items = directory.ListBlobs();
            foreach (var item in items)
            {
                if (item is CloudBlobDirectory)
                {
                    var dir = item as CloudBlobDirectory;
                    string key = dir.Prefix.Replace("hbase/data/default/", "").Replace("/", "");
                    if (result.ContainsKey(key)) continue;
                    result.Add(key, new ResourceEntity() {
                         Type = ResourceType.HBase,
                         TableName=key,
                         CopiesNum=0,//Azure存储,HDFS默认的副本数为2,即共存3份,但只收1份的费用,故此处记为0
                         Size_B = GetFileSizeByBlobPath(dir.Prefix)//Azure存储,HDFS默认的副本数为2,即共存3份,但只收1份的费用,此处取到的size也是1份的大小
                    });
                }
            }
            return result;
        }

        public long GetFileSizeByBlobPath(string directoryPath)
        {    
            CloudBlobDirectory directory = this.blobContainer.GetDirectoryReference(directoryPath);
            if (directory == null)
                return 0;
            var items = directory.ListBlobs(true, BlobListingDetails.All).Where(item => (item as CloudBlockBlob).Properties.Length > 0);
            long size = 0;
            foreach (var item in items)
            {
                var tmp = (item as CloudBlockBlob);
                if (tmp.Name.Contains(".regioninfo") || tmp.Name.Contains(".tableinfo") || tmp.Name.Contains("recovered.edits")) continue;
                string[] guid = tmp.Name.Replace(directoryPath, "").Split(new char[] { '/' }, StringSplitOptions.RemoveEmptyEntries);
                size += tmp.Properties.Length;
            }
            return size;
        }
    }
View Code
原文地址:https://www.cnblogs.com/xianhan/p/6987230.html