Hive权限之审计

因为在生产环境中大量使用hive。而hive的权限又较弱,假设可以记录全部hive操作,在增强安全性的同一时候,还可以统计hive表的使用频率;同一时候假设可以记录hql的開始和结束时间,则可以找出系统中花费时间较多的job,针对性的进行优化,因此跟踪hive的使用轨迹,增强安全的同一时候也能方便问题定位。

怎样记录用户操作了?Hive Hook为我们提供的方便的开放接口。

我们对hive的使用主要有两种使用场景,一是平时直接在命令行下运行的hql操作,此时运行hql的实体就是OS的登录用户。第二种是从webapp获取的业务数据需求人员创建定时报表的hql脚本。此时运行hql的真正实体事实上是报表创建者,系统不过代理运行而已,此时记录用户的行为则须要重写hive.security.authenticator.manager

 

Hive默认使用HadoopDefaultAuthenticator获取运行hql的用户,使用其返回的用户进行权限验证。

为了使hive可以以代理的模式去运行,我们须要提供自己的authenticator。返回真正的hql运行者。下面配置可设置authenticator:

<property>

 <name>hive.security.authenticator.manager</name>

 <value>com.pplive.bip.hive.auth.Authenticator</value>

  <description>bip user authenticator</description>

</property>

仅仅有管理员能够开启代理模式。能够使用下面方式传递代理用户:

Hive -d bip.user=xxx 或 hive --define bip.user=xxx

重写authenticator代码演示样例:

 public classAuthenticator implements HiveAuthenticationProvider {

 

 private finalstaticString BIP_USER="bip.user";

 privateStringuserName;

 privateStringbipUser;

 privateList<String>groupNames;

 

 privateConfigurationconf;

 @Override

 publicList<String> getGroupNames() {

   returngroupNames;

 }

 @Override

 publicStringgetUserName() {          

   this.bipUser = SessionState.get().getHiveVariables().get(BIP_USER);

   if(this.bipUser !=null &&!this.bipUser.isEmpty()) {

    if( AdminManager.isAdmin(this.userName)) {

        returnthis.bipUser;

    } else {

        thrownewRuntimeException("bip.user is set while youare not admin");

    }       

   } else{

    returnthis.userName;

   }

 }

 @Override

 publicvoidsetConf(Configuration conf) {

   this.conf = conf;

   UserGroupInformation ugi = null;

   try{

     ugi = ShimLoader.getHadoopShims().getUGIForConf(conf);

//     UserGroupInformation.createProxyUser(user, realUser);

   } catch(Exception e) {

     thrownewRuntimeException(e);

   }

   if(ugi == null){

     thrownewRuntimeException(

          "Can not initialize PPLive Authenticator.");

   }

   this.userName = ugi.getUserName();

   if(ugi.getGroupNames() !=null) {

     this.groupNames = Arrays.asList(ugi.getGroupNames());

   }          

 }

 

 publicString getProxy() {

      return  this.userName;

  }

Hive提供的SemanticHook能够方便我们记录hql语义分析前后的状态。Execute Hook能够记录hql翻译成job提交运行前后的状态。 Driver Hook能够记录包含整个编译运行过程前后的状态。

SemanticHook记录语义分析后的行为:

    public voidpostAnalyze(HiveSemanticAnalyzerHookContext context,

            List<Task<?

extendsSerializable>> rootTasks)

            throws SemanticException {

        Hivehive = null;

        try {

            hive= context.getHive();

        }catch(HiveException e) {

            e.printStackTrace();

            throw new RuntimeException(e);

        }

        Set<ReadEntity>inputs = context.getInputs();

        Set<WriteEntity>outputs = context.getOutputs();

       

        Set<String>readTables = newHashSet<String>();

        for(ReadEntity input :inputs) {

             Table table = input.getT();

            if(table!=null) {

                readTables.add(table.getTableName());

            }

        }  

        Set<String>writeTables = newHashSet<String>();

        for(WriteEntity output :outputs) {

             Table table = output.getT();

            if(table!=null) {

                writeTables.add(table.getTableName());

            }

        }

        HiveAuthenticationProviderauthenticationProvider = SessionState.get().getAuthenticator();                                     

        if(authenticationProviderinstanceof Authenticator) {

            Authenticatorauthenticator = (Authenticator)authenticationProvider;       //ip

            this.logger.info(String.format("phase=SA&executor=%s&proxy=%s&db=%s&cmd=%s&readTables=%s&writeTables=%s", authenticator.getUserName(),

                            authenticator.getProxy(), hive.getCurrentDatabase(),context.getCommand(),readTables.toString(),writeTables.toString()));

        }    

        StringuserName = SessionState.get().getAuthenticator().getUserName();

        logger.debug(String.format("%s execute %s, read tables:%s, writetables:%s", userName, context.getCommand(),readTables, writeTables));  

    }

Execute Hook记录job状态:

public classExecuteHook implements ExecuteWithHookContext {

    Loggerlogger= Logger.getLogger(DriverRunHook.class);

    privateHiveAuthenticationProviderauthenticationProvider = null;

    private static final String JOB_START_TIME="PRE_EXEC_HOOK";

    private static SimpleDateFormat dateFormat =new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    @Override

    public void run(HookContexthookContext) throwsException {

        QueryPlanqueryPlan = hookContext.getQueryPlan();

        StringqueryId = queryPlan.getQueryId();

        StringqueryStr = queryPlan.getQueryStr();

        if(authenticationProvider==null){

            authenticationProvider= SessionState.get().getAuthenticator();

        }   

        Stringresult = null;

        switch(hookContext.getHookType()){

        //hive.exec.pre.hooks

        case PRE_EXEC_HOOK:

            hookContext.getConf().setLong(JOB_START_TIME,System.currentTimeMillis());

            break;

        //hive.exec.post.hooks

        case POST_EXEC_HOOK:

            result= "Success";

            break;

        //hive.exec.failure.hooks

        case ON_FAILURE_HOOK:

            result= "Failure";

            break;

        default:

            break;

        }    

        if(hookContext.getHookType()!= HookContext.HookType.PRE_EXEC_HOOK&&authenticationProvider instanceofAuthenticator) {

            long jobEndTime = System.currentTimeMillis();

            HiveConfconf = hookContext.getConf();

            long jobStartTime =conf.getLong(JOB_START_TIME, jobEndTime);

            long timeTaken =(jobEndTime-jobStartTime)/1000;

            Authenticatorauthenticator = (Authenticator)authenticationProvider;       //ip

                         this.logger.info(String.format("phase=EXEC&result=%s&executor=%s&proxy=%s&db=%s&queryId=%s&queryStr=%s&jobName=%s&jobStartTime=%s&jobEndTime=%s&timeTaken=%d", result,authenticator.getUserName(),authenticator.getProxy(),

                    Hive.get().getCurrentDatabase(),queryId, queryStr,conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME),dateFormat.format(new Date(jobStartTime)),

                    dateFormat.format(newDate(jobEndTime)),timeTaken));

        }      

    }

}

DriverHook记录整个过程运行时间:

public classDriverRunHook implements HiveDriverRunHook{

    Loggerlogger= Logger.getLogger(DriverRunHook.class);

    privateHiveAuthenticationProviderauthenticationProvider = null;

    private static SimpleDateFormat dateFormat =new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    private long startTime = 0;

    @Override

    public voidpreDriverRun(HiveDriverRunHookContext hookContext)

            throws Exception {

        if(authenticationProvider==null){

            authenticationProvider= SessionState.get().getAuthenticator();

        }    

        startTime = System.currentTimeMillis();

    }

    @Override

    public voidpostDriverRun(HiveDriverRunHookContext hookContext)

            throws Exception {     

        if(authenticationProviderinstanceofAuthenticator) {

            long endTime = System.currentTimeMillis();

            long timeTaken = (endTime-startTime)/1000;

            Authenticatorauthenticator = (Authenticator)authenticationProvider;       //ip

            this.logger.info(String.format("phase=DriverRun&executor=%s&proxy=%s&db=%s&cmd=%s&startTime=%s&endTime=%s&timeTaken=%d", authenticator.getUserName(),authenticator.getProxy(),

                    Hive.get().getCurrentDatabase(),hookContext.getCommand(),dateFormat.format(newDate(startTime)),dateFormat.format(new Date(endTime)),timeTaken));

        }      

    }

}

原文地址:https://www.cnblogs.com/clnchanpin/p/7102792.html