aaaa

s

 perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
      BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree);
      List<HiveSemanticAnalyzerHook> saHooks =
          getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK,
              HiveSemanticAnalyzerHook.class);
 SemanticAnalyzerFactory.get(queryState, tree);
      if (saHooks != null && !saHooks.isEmpty()) {
        HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
        hookCtx.setConf(conf);
        hookCtx.setUserName(userName);
        hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
        hookCtx.setCommand(command);
        for (HiveSemanticAnalyzerHook hook : saHooks) {
          tree = hook.preAnalyze(hookCtx, tree);
        }
        sem.analyze(tree, ctx);
        hookCtx.update(sem);
        for (HiveSemanticAnalyzerHook hook : saHooks) {
          hook.postAnalyze(hookCtx, sem.getAllRootTasks());
        }
      } else {
        sem.analyze(tree, ctx);
      }
      // Record any ACID compliant FileSinkOperators we saw so we can add our transaction ID to
      // them later.
      acidSinks = sem.getAcidFileSinks();

      LOG.info("Semantic Analysis Completed");

      // validate the plan
      sem.validate();
      acidInQuery = sem.hasAcidInQuery();
      if (saHooks != null && !saHooks.isEmpty()) {
        HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
        hookCtx.setConf(conf);
        hookCtx.setUserName(userName);
        hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
        hookCtx.setCommand(command);
        for (HiveSemanticAnalyzerHook hook : saHooks) {
          tree = hook.preAnalyze(hookCtx, tree);
        }
        sem.analyze(tree, ctx);
        hookCtx.update(sem);
        for (HiveSemanticAnalyzerHook hook : saHooks) {
          hook.postAnalyze(hookCtx, sem.getAllRootTasks());
        }
      } else {
        sem.analyze(tree, ctx);
      }
      // Record any ACID compliant FileSinkOperators we saw so we can add our transaction ID to
      // them later.
      acidSinks = sem.getAcidFileSinks();

      LOG.info("Semantic Analysis Completed");

      // validate the plan
      sem.validate();
      acidInQuery = sem.hasAcidInQuery();
  public static BaseSemanticAnalyzer get(QueryState queryState, ASTNode tree)
      throws SemanticException {
    if (tree.getToken() == null) {
      throw new RuntimeException("Empty Syntax Tree");
    } else {
      HiveOperation opType = commandType.get(tree.getType());
      queryState.setCommandType(opType);
      switch (tree.getType()) {
      case HiveParser.TOK_EXPLAIN:
        return new ExplainSemanticAnalyzer(queryState);
      case HiveParser.TOK_EXPLAIN_SQ_REWRITE:
        return new ExplainSQRewriteSemanticAnalyzer(queryState);
      case HiveParser.TOK_LOAD:
        return new LoadSemanticAnalyzer(queryState);
      case HiveParser.TOK_EXPORT:
        return new ExportSemanticAnalyzer(queryState);
      case HiveParser.TOK_IMPORT:
        return new ImportSemanticAnalyzer(queryState);
      case HiveParser.TOK_REPL_DUMP:
        return new ReplicationSemanticAnalyzer(queryState);
      case HiveParser.TOK_REPL_LOAD:
        return new ReplicationSemanticAnalyzer(queryState);
      case HiveParser.TOK_REPL_STATUS:
        return new ReplicationSemanticAnalyzer(queryState);
      case HiveParser.TOK_ALTERTABLE: {
        Tree child = tree.getChild(1);
        switch (child.getType()) {
          case HiveParser.TOK_ALTERTABLE_RENAME:
          case HiveParser.TOK_ALTERTABLE_TOUCH:
          case HiveParser.TOK_ALTERTABLE_ARCHIVE:
          case HiveParser.TOK_ALTERTABLE_UNARCHIVE:
          case HiveParser.TOK_ALTERTABLE_ADDCOLS:
          case HiveParser.TOK_ALTERTABLE_RENAMECOL:
          case HiveParser.TOK_ALTERTABLE_REPLACECOLS:
          case HiveParser.TOK_ALTERTABLE_DROPPARTS:
          case HiveParser.TOK_ALTERTABLE_ADDPARTS:
          case HiveParser.TOK_ALTERTABLE_PARTCOLTYPE:
          case HiveParser.TOK_ALTERTABLE_PROPERTIES:
          case HiveParser.TOK_ALTERTABLE_DROPPROPERTIES:
          case HiveParser.TOK_ALTERTABLE_EXCHANGEPARTITION:
          case HiveParser.TOK_ALTERTABLE_SKEWED:
          case HiveParser.TOK_ALTERTABLE_DROPCONSTRAINT:
          case HiveParser.TOK_ALTERTABLE_ADDCONSTRAINT:
          queryState.setCommandType(commandType.get(child.getType()));
          return new DDLSemanticAnalyzer(queryState);
        }
        opType =
            tablePartitionCommandType.get(child.getType())[tree.getChildCount() > 2 ? 1 : 0];
        queryState.setCommandType(opType);
        return new DDLSemanticAnalyzer(queryState);
      }

dd

  public void analyze(ASTNode ast, Context ctx) throws SemanticException {
    initCtx(ctx);
    init(true);
    analyzeInternal(ast);
  }

ss

 analyzeInternal(ast);
 boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
    ASTNode child = ast;
    this.ast = ast;
    viewsExpanded = new ArrayList<String>();
    ctesExpanded = new ArrayList<String>();

    // 1. analyze and process the position alias
    processPositionAlias(ast);

    // 2. analyze create table command
    if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) {
      // if it is not CTAS, we don't need to go further and just return
      if ((child = analyzeCreateTable(ast, qb, plannerCtx)) == null) {
        return false;
      }
    } else {
      queryState.setCommandType(HiveOperation.QUERY);
    }

    // 3. analyze create view command
    if (ast.getToken().getType() == HiveParser.TOK_CREATEVIEW ||
        ast.getToken().getType() == HiveParser.TOK_CREATE_MATERIALIZED_VIEW ||
        (ast.getToken().getType() == HiveParser.TOK_ALTERVIEW &&
            ast.getChild(1).getType() == HiveParser.TOK_QUERY)) {
      child = analyzeCreateView(ast, qb, plannerCtx);
      if (child == null) {
        return false;
      }
      viewSelect = child;
      // prevent view from referencing itself
      viewsExpanded.add(createVwDesc.getViewName());
    }

    switch(ast.getToken().getType()) {
      case HiveParser.TOK_SET_AUTOCOMMIT:
        assert ast.getChildCount() == 1;
        if(ast.getChild(0).getType() == HiveParser.TOK_TRUE) {
          setAutoCommitValue(true);
        }
        else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) {
          setAutoCommitValue(false);
        }
        else {
          assert false : "Unexpected child of TOK_SET_AUTOCOMMIT: " + ast.getChild(0).getType();
        }
        //fall through
      case HiveParser.TOK_START_TRANSACTION:
      case HiveParser.TOK_COMMIT:
      case HiveParser.TOK_ROLLBACK:
        if(!(conf.getBoolVar(ConfVars.HIVE_IN_TEST) || conf.getBoolVar(ConfVars.HIVE_IN_TEZ_TEST))) {
          throw new IllegalStateException(SemanticAnalyzerFactory.getOperation(ast.getToken().getType()) +
            " is not supported yet.");
        }
        queryState.setCommandType(SemanticAnalyzerFactory.getOperation(ast.getToken().getType()));
        return false;
    }

dd

  /**
   * Phase 1: (including, but not limited to):
   *
   * 1. Gets all the aliases for all the tables / subqueries and makes the
   * appropriate mapping in aliasToTabs, aliasToSubq 2. Gets the location of the
   * destination and names the clause "inclause" + i 3. Creates a map from a
   * string representation of an aggregation tree to the actual aggregation AST
   * 4. Creates a mapping from the clause name to the select expression AST in
   * destToSelExpr 5. Creates a mapping from a table alias to the lateral view
   * AST's in aliasToLateralViews
   *
   * @param ast
   * @param qb
   * @param ctx_1
   * @throws SemanticException
   */
  @SuppressWarnings({"fallthrough", "nls"})
  public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plannerCtx)
      throws SemanticException {

    boolean phase1Result = true;
    QBParseInfo qbp = qb.getParseInfo();
    boolean skipRecursion = false;

    if (ast.getToken() != null) {
      skipRecursion = true;
      switch (ast.getToken().getType()) {
      case HiveParser.TOK_SELECTDI:
        qb.countSelDi();
        // fall through
      case HiveParser.TOK_SELECT:
        qb.countSel();
        qbp.setSelExprForClause(ctx_1.dest, ast);

        int posn = 0;
        if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) {
          qbp.setHints((ASTNode) ast.getChild(0));
          posn++;
        }

        if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM))
          queryProperties.setUsesScript(true);

        LinkedHashMap<String, ASTNode> aggregations = doPhase1GetAggregationsFromSelect(ast,
            qb, ctx_1.dest);
        doPhase1GetColumnAliasesFromSelect(ast, qbp);
        qbp.setAggregationExprsForClause(ctx_1.dest, aggregations);
        qbp.setDistinctFuncExprsForClause(ctx_1.dest,
          doPhase1GetDistinctFuncExprs(aggregations));
        break;

      case HiveParser.TOK_WHERE:
        qbp.setWhrExprForClause(ctx_1.dest, ast);
        if (!SubQueryUtils.findSubQueries((ASTNode) ast.getChild(0)).isEmpty())
            queryProperties.setFilterWithSubQuery(true);
        break;

      case HiveParser.TOK_INSERT_INTO:
        String currentDatabase = SessionState.get().getCurrentDatabase();
        String tab_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0), currentDatabase);
        qbp.addInsertIntoTable(tab_name, ast);

      case HiveParser.TOK_DESTINATION:
        ctx_1.dest = this.ctx.getDestNamePrefix(ast).toString() + ctx_1.nextNum;
        ctx_1.nextNum++;
        boolean isTmpFileDest = false;
        if (ast.getChildCount() > 0 && ast.getChild(0) instanceof ASTNode) {
          ASTNode ch = (ASTNode) ast.getChild(0);
          if (ch.getToken().getType() == HiveParser.TOK_DIR && ch.getChildCount() > 0
              && ch.getChild(0) instanceof ASTNode) {
            ch = (ASTNode) ch.getChild(0);
            isTmpFileDest = ch.getToken().getType() == HiveParser.TOK_TMP_FILE;
          } else {
            if (ast.getToken().getType() == HiveParser.TOK_DESTINATION
                && ast.getChild(0).getType() == HiveParser.TOK_TAB) {
              String fullTableName = getUnescapedName((ASTNode) ast.getChild(0).getChild(0),
                  SessionState.get().getCurrentDatabase());
              qbp.getInsertOverwriteTables().put(fullTableName, ast);
            }
          }
        }

        // is there a insert in the subquery
        if (qbp.getIsSubQ() && !isTmpFileDest) {
          throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
        }

        qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
        handleInsertStatementSpecPhase1(ast, qbp, ctx_1);

        if (qbp.getClauseNamesForDest().size() == 2) {
          // From the moment that we have two destination clauses,
          // we know that this is a multi-insert query.
          // Thus, set property to right value.
          // Using qbp.getClauseNamesForDest().size() >= 2 would be
          // equivalent, but we use == to avoid setting the property
          // multiple times
          queryProperties.setMultiDestQuery(true);
        }

        if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) {
          plannerCtx.setInsertToken(ast, isTmpFileDest);
        } else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) {
          // For multi-insert query, currently we only optimize the FROM clause.
          // Hence, introduce multi-insert token on top of it.
          // However, first we need to reset existing token (insert).
          // Using qbp.getClauseNamesForDest().size() >= 2 would be
          // equivalent, but we use == to avoid setting the property
          // multiple times
          plannerCtx.resetToken();
          plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0));
        }
        break;

      case HiveParser.TOK_FROM:
        int child_count = ast.getChildCount();
        if (child_count != 1) {
          throw new SemanticException(generateErrorMessage(ast,
              "Multiple Children " + child_count));
        }

        if (!qbp.getIsSubQ()) {
          qbp.setQueryFromExpr(ast);
        }

        // Check if this is a subquery / lateral view
        ASTNode frm = (ASTNode) ast.getChild(0);
        if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
          processTable(qb, frm);
        } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) {
          // Create a temp table with the passed values in it then rewrite this portion of the
          // tree to be from that table.
          ASTNode newFrom = genValuesTempTable(frm, qb);
          ast.setChild(0, newFrom);
          processTable(qb, newFrom);
        } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) {
          processSubQuery(qb, frm);
        } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW ||
            frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW_OUTER) {
          queryProperties.setHasLateralViews(true);
          processLateralView(qb, frm);
        } else if (isJoinToken(frm)) {
          processJoin(qb, frm);
          qbp.setJoinExpr(frm);
        }else if(frm.getToken().getType() == HiveParser.TOK_PTBLFUNCTION){
          queryProperties.setHasPTF(true);
          processPTF(qb, frm);
        }
        break;

      case HiveParser.TOK_CLUSTERBY:
        // Get the clusterby aliases - these are aliased to the entries in the
        // select list
        queryProperties.setHasClusterBy(true);
        qbp.setClusterByExprForClause(ctx_1.dest, ast);
        break;

      case HiveParser.TOK_DISTRIBUTEBY:
        // Get the distribute by aliases - these are aliased to the entries in
        // the
        // select list
        queryProperties.setHasDistributeBy(true);
        qbp.setDistributeByExprForClause(ctx_1.dest, ast);
        if (qbp.getClusterByForClause(ctx_1.dest) != null) {
          throw new SemanticException(generateErrorMessage(ast,
              ErrorMsg.CLUSTERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
        } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
          throw new SemanticException(generateErrorMessage(ast,
              ErrorMsg.ORDERBY_DISTRIBUTEBY_CONFLICT.getMsg()));
        }
        break;

      case HiveParser.TOK_SORTBY:
     // Get the sort by aliases - these are aliased to the entries in the
        // select list
        queryProperties.setHasSortBy(true);
        qbp.setSortByExprForClause(ctx_1.dest, ast);
        if (qbp.getClusterByForClause(ctx_1.dest) != null) {
          throw new SemanticException(generateErrorMessage(ast,
              ErrorMsg.CLUSTERBY_SORTBY_CONFLICT.getMsg()));
        } else if (qbp.getOrderByForClause(ctx_1.dest) != null) {
          throw new SemanticException(generateErrorMessage(ast,
              ErrorMsg.ORDERBY_SORTBY_CONFLICT.getMsg()));
        }

        break;

      case HiveParser.TOK_ORDERBY:
        // Get the order by aliases - these are aliased to the entries in the
        // select list
        queryProperties.setHasOrderBy(true);
        qbp.setOrderByExprForClause(ctx_1.dest, ast);
        if (qbp.getClusterByForClause(ctx_1.dest) != null) {
          throw new SemanticException(generateErrorMessage(ast,
              ErrorMsg.CLUSTERBY_ORDERBY_CONFLICT.getMsg()));
        }
        break;

      case HiveParser.TOK_GROUPBY:
      case HiveParser.TOK_ROLLUP_GROUPBY:
      case HiveParser.TOK_CUBE_GROUPBY:
      case HiveParser.TOK_GROUPING_SETS:
        // Get the groupby aliases - these are aliased to the entries in the
        // select list
        queryProperties.setHasGroupBy(true);
        if (qbp.getJoinExpr() != null) {
          queryProperties.setHasJoinFollowedByGroupBy(true);
        }
        if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
          throw new SemanticException(generateErrorMessage(ast,
              ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
        }
        qbp.setGroupByExprForClause(ctx_1.dest, ast);
        skipRecursion = true;

        // Rollup and Cubes are syntactic sugar on top of grouping sets
        if (ast.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) {
          qbp.getDestRollups().add(ctx_1.dest);
        } else if (ast.getToken().getType() == HiveParser.TOK_CUBE_GROUPBY) {
          qbp.getDestCubes().add(ctx_1.dest);
        } else if (ast.getToken().getType() == HiveParser.TOK_GROUPING_SETS) {
          qbp.getDestGroupingSets().add(ctx_1.dest);
        }
        break;

      case HiveParser.TOK_HAVING:
        qbp.setHavingExprForClause(ctx_1.dest, ast);
        qbp.addAggregationExprsForClause(ctx_1.dest,
            doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest));
        break;

      case HiveParser.KW_WINDOW:
        if (!qb.hasWindowingSpec(ctx_1.dest) ) {
          throw new SemanticException(generateErrorMessage(ast,
              "Query has no Cluster/Distribute By; but has a Window definition"));
        }
        handleQueryWindowClauses(qb, ctx_1, ast);
        break;

      case HiveParser.TOK_LIMIT:
        if (ast.getChildCount() == 2) {
          qbp.setDestLimit(ctx_1.dest,
              new Integer(ast.getChild(0).getText()),
              new Integer(ast.getChild(1).getText()));
        } else {
          qbp.setDestLimit(ctx_1.dest, new Integer(0),
              new Integer(ast.getChild(0).getText()));
        }
        break;

      case HiveParser.TOK_ANALYZE:
        // Case of analyze command

        String table_name = getUnescapedName((ASTNode) ast.getChild(0).getChild(0)).toLowerCase();


        qb.setTabAlias(table_name, table_name);
        qb.addAlias(table_name);
        qb.getParseInfo().setIsAnalyzeCommand(true);
        qb.getParseInfo().setNoScanAnalyzeCommand(this.noscan);
        qb.getParseInfo().setPartialScanAnalyzeCommand(this.partialscan);
        // Allow analyze the whole table and dynamic partitions
        HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
        HiveConf.setVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE, "nonstrict");

        break;

      case HiveParser.TOK_UNIONALL:
        if (!qbp.getIsSubQ()) {
          // this shouldn't happen. The parser should have converted the union to be
          // contained in a subquery. Just in case, we keep the error as a fallback.
          throw new SemanticException(generateErrorMessage(ast,
              ErrorMsg.UNION_NOTIN_SUBQ.getMsg()));
        }
        skipRecursion = false;
        break;

      case HiveParser.TOK_INSERT:
        ASTNode destination = (ASTNode) ast.getChild(0);
        Tree tab = destination.getChild(0);

        // Proceed if AST contains partition & If Not Exists
        if (destination.getChildCount() == 2 &&
            tab.getChildCount() == 2 &&
            destination.getChild(1).getType() == HiveParser.TOK_IFNOTEXISTS) {
          String tableName = tab.getChild(0).getChild(0).getText();

          Tree partitions = tab.getChild(1);
          int childCount = partitions.getChildCount();
          HashMap<String, String> partition = new HashMap<String, String>();
          for (int i = 0; i < childCount; i++) {
            String partitionName = partitions.getChild(i).getChild(0).getText();
            Tree pvalue = partitions.getChild(i).getChild(1);
            if (pvalue == null) {
              break;
            }
            String partitionVal = stripQuotes(pvalue.getText());
            partition.put(partitionName, partitionVal);
          }
          // if it is a dynamic partition throw the exception
          if (childCount != partition.size()) {
            throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS
                .getMsg(partition.toString()));
          }
          Table table = null;
          try {
            table = this.getTableObjectByName(tableName);
          } catch (HiveException ex) {
            throw new SemanticException(ex);
          }
          try {
            Partition parMetaData = db.getPartition(table, partition, false);
            // Check partition exists if it exists skip the overwrite
            if (parMetaData != null) {
              phase1Result = false;
              skipRecursion = true;
              LOG.info("Partition already exists so insert into overwrite " +
                  "skipped for partition : " + parMetaData.toString());
              break;
            }
          } catch (HiveException e) {
            LOG.info("Error while getting metadata : ", e);
          }
          validatePartSpec(table, partition, (ASTNode)tab, conf, false);
        }
        skipRecursion = false;
        break;
      case HiveParser.TOK_LATERAL_VIEW:
      case HiveParser.TOK_LATERAL_VIEW_OUTER:
        // todo: nested LV
        assert ast.getChildCount() == 1;
        qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
        break;
      case HiveParser.TOK_CTE:
        processCTE(qb, ast);
        break;
      default:
        skipRecursion = false;
        break;
      }
    }

    if (!skipRecursion) {
      // Iterate over the rest of the children
      int child_count = ast.getChildCount();
      for (int child_pos = 0; child_pos < child_count && phase1Result; ++child_pos) {
        // Recurse
        phase1Result = phase1Result && doPhase1(
            (ASTNode)ast.getChild(child_pos), qb, ctx_1, plannerCtx);
      }
    }
    return phase1Result;
  }
原文地址:https://www.cnblogs.com/itxuexiwang/p/6294148.html