PostgreSQL查询优化之子查询优化

子查询优化

上拉子连接

上拉子连接主要是把ANY和EXIST子句转换为半连接

void
pull_up_sublinks(PlannerInfo *root)
{
    Node	   *jtnode; //子连接上拉生成的结果
    Relids		relids;
    
    /* Begin recursion through the jointree ,jointree代表From和join子句*/
    jtnode = pull_up_sublinks_jointree_recurse(root,
                                           (Node *) root->parse->jointree,
                                           &relids);

    //把生成的结果jtnode,放入jointree表达式中
    if (IsA(jtnode, FromExpr))
    	root->parse->jointree = (FromExpr *) jtnode;
    else
    	root->parse->jointree = makeFromExpr(list_make1(jtnode), NULL);
}
static Node *
pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
								  Relids *relids)
{
    if (jtnode == NULL)
    else if (IsA(jtnode, RangeTblRef))//如果是范围表,直接合并到表示关系的relid中
    //处理FromExpr
    else if (IsA(jtnode, FromExpr))
    {
    	/* 递归上拉From中所有子连接 */
    	foreach(l, f->fromlist)
    	{
            newchild = pull_up_sublinks_jointree_recurse(root,
                            lfirst(l),
                            &childrelids);
            //合并上拉的子连接到本层
            newfromlist = lappend(newfromlist, newchild);
            frelids = bms_join(frelids, childrelids);
    	}
    	//递归上拉子连接中的条件
        newf->quals = pull_up_sublinks_qual_recurse(root, f->quals,
    					&jtlink, frelids,
    					NULL, NULL);
    }
    else if (IsA(jtnode, JoinExpr))
    {
    	/* 上拉左右连接中的子连接 */
    	j->larg = pull_up_sublinks_jointree_recurse(root, j->larg,
    						&leftrelids);
    	j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg,
    						&rightrelids);
    	//递归上拉子连接中的条件
    	switch (j->jointype)
    	{
    		case JOIN_INNER:
    			j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
                					 &jtlink,
                					 bms_union(leftrelids,
                							rightrelids),
                					 NULL, NULL);
    			break;
    		case JOIN_LEFT:
    			j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
							 &j->rarg,
							 rightrelids,
							 NULL, NULL);
    			break;
    		case JOIN_FULL:
    			/* can't do anything with full-join quals */
    			break;
    		case JOIN_RIGHT:
    			j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
							 &j->larg,
							 leftrelids,
							 NULL, NULL);
    			break;
    		default:
    			elog(ERROR, "unrecognized join type: %d",
    				 (int) j->jointype);
    			break;
    	}
    
    }
    else
    	elog(ERROR, "unrecognized node type: %d",
    		 (int) nodeTag(jtnode));
    return jtnode;
}

用于递归上拉限制条件中存在的子连接(ANY,EXISTS),经过convert_ANY_sublink_to_join转按ANY子连接,经过convert_EXISTS_sublink_to_join消除EXISTS递归消除

static Node *
//node为子连接节点,jtlink1和available_rels1都是上拉后的返回结果
pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
    		  Node **jtlink1, Relids available_rels1,
    		  Node **jtlink2, Relids available_rels2)
{
    if (node == NULL)
    	return NULL;
    if (IsA(node, SubLink))
    {
    	/* 上拉ANY_SUBLINK类型子连接*/
    	if (sublink->subLinkType == ANY_SUBLINK)
    	{
        	if ((j = convert_ANY_sublink_to_join(root, sublink,
        				available_rels1)) != NULL)//上拉ANY子连接成功
        	{
                //处理刚上拉的右关系
                j->rarg = pull_up_sublinks_jointree_recurse(root,
                        			j->rarg,
                        			&child_rels);
        
                //处理刚刚上拉的子连接
            	j->quals = pull_up_sublinks_qual_recurse(root,
    							 j->quals,
    							 &j->larg,
    							 available_rels1,
    							 &j->rarg,
    							 child_rels);
        		/* Return NULL representing constant TRUE */
        		return NULL;
        	}
        	//处理available_rels2对应项
        	if (available_rels2 != NULL &&
        		(j = convert_ANY_sublink_to_join(root, sublink,
						 available_rels2)) != NULL)

    	}
    	//上拉EXIST_SUBLINKS对应项
    	else if (sublink->subLinkType == EXISTS_SUBLINK)
    	{
    		if ((j = convert_EXISTS_sublink_to_join(root, sublink, false,
						available_rels1)) != NULL)
    		{
    		    //基本逻辑和ANY块相同
    		}
    		if (available_rels2 != NULL &&
    			(j = convert_EXISTS_sublink_to_join(root, sublink, false,
							available_rels2)) != NULL)
    		{
                //基本逻辑和ANY块相同
    		}
    	}
    	/* Else return it unmodified */
    	return node;
    }
    //处理NOT_EXISTS
    if (not_clause(node))
    {
    }
    if (and_clause(node)) //处理AND每一个子项
    {
    	/* Recurse into AND clause */
    	List	   *newclauses = NIL;
    	ListCell   *l;
    
    	foreach(l, ((BoolExpr *) node)->args)
    	{
    		Node	   *oldclause = (Node *) lfirst(l);
    		Node	   *newclause;
    
    		newclause = pull_up_sublinks_qual_recurse(root,
							  oldclause,
							  jtlink1,
							  available_rels1,
							  jtlink2,
							  available_rels2);
    		if (newclause)
    			newclauses = lappend(newclauses, newclause);
    	}
    	/* We might have got back fewer clauses than we started with */
    	if (newclauses == NIL)
    		return NULL;
    	else if (list_length(newclauses) == 1)
    		return (Node *) linitial(newclauses);
    	else
    		return (Node *) make_andclause(newclauses);
    }
    /* Stop if not an AND */
    return node;
}

pull_up_sublinks_qual_recurse和pull_up_sublinks_qual_recurse交替调用,穷尽所有ANY/EXISTS类型子连接

ANY(类型包括ANY,NOT ANY,SOME, NOT SOME, IN, NOT IN)

JoinExpr *
convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
							Relids available_rels)
{
    JoinExpr   *result;//子链接转换为连接关系
    
    Assert(sublink->subLinkType == ANY_SUBLINK);
    
    //子连接右操作数:不能出现包含上层任何Var对象
    if (contain_vars_of_level((Node *) subselect, 1))
    	return NULL;
    
    /*子连接左操作数
     *a. 一定与上层出现的Var结构体表示的对象有相同,如果没有,可以直接求解,不用和上层关联
     *b. 不能引用上层出现的关系
     *c. 不能出现易失函数
     */
    upper_varnos = pull_varnos(sublink->testexpr);//情况a
    if (bms_is_empty(upper_varnos))
    	return NULL;
    
    /*
     * However, it can't refer to anything outside available_rels.
     */
    if (!bms_is_subset(upper_varnos, available_rels))//情况b
    	return NULL;
    
    /*
     * The combining operators and left-hand expressions mustn't be volatile.
     */
    if (contain_volatile_functions(sublink->testexpr))情况c
    	return NULL;
    
    /* Create a dummy ParseState for addRangeTableEntryForSubquery */
    pstate = make_parsestate(NULL);
    
    /*上拉子链接到上层范围表中,作为未来连接的对象 */
    rte = addRangeTableEntryForSubquery(pstate,
    			subselect,
    			makeAlias("ANY_subquery", NIL),
    			false,
    			false);
    parse->rtable = lappend(parse->rtable, rte);
    rtindex = list_length(parse->rtable);
    
    /*
     * Form a RangeTblRef for the pulled-up sub-select.
     */
    rtr = makeNode(RangeTblRef);
    rtr->rtindex = rtindex;
    
    /*
     * Build a list of Vars representing the subselect outputs.
     */
    subquery_vars = generate_subquery_vars(root,
    		   subselect->targetList,
    		   rtindex);
    
    /* 连接条件*/
    quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
    
    /*
     * And finally, build the JoinExpr node.
     */
    result = makeNode(JoinExpr);
    result->jointype = JOIN_SEMI;//半连接
    
    return result;
}

有几种情况会上拉失败:

  1. 右操作数不能包含上层操作对象(包含就是关联子查询了)
  2. 左边操作符
    1. 与上层对象无关
    2. 不能引用上层的关系?
    3. 含有易失函数

将EXISTS,NOT EXISTS上拉

JoinExpr *
convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
	   bool under_not, Relids available_rels)
{
    JoinExpr   *result;
    Query	   *parse = root->parse;
    
    Assert(sublink->subLinkType == EXISTS_SUBLINK);
    
    /*对于右操作数
     * a. 不支持带有WITH子句的格式
     * b. 不支持集合操作或者带有CTE,聚合函数, HAVING,LIMIT等格式
     * c. 不支持FROM或者WHERE子句为空
     */
    if (subselect->cteList)//情况a
    	return NULL;
    

    if (!simplify_EXISTS_query(root, subselect))//情况b
    	return NULL;

    if (subselect->jointree->fromlist == NIL)//情况c
    	return NULL;
    
    whereClause = subselect->jointree->quals;//子查询条件保存
    subselect->jointree->quals = NULL;
    
    //右操作树的子查询不能包含上层中出现的任何Var对象
    if (contain_vars_of_level((Node *) subselect, 1))
    	return NULL;
    //右操作数的where需要有山层的Var,这样才能够成功连接
    if (!contain_vars_of_level(whereClause, 1))
    	return NULL;
    //易失函数不能优化
    if (contain_volatile_functions(whereClause))
    	return NULL;
    
    //上拉成功,到顶层范围表
    rtoffset = list_length(parse->rtable);
    OffsetVarNodes((Node *) subselect, rtoffset, 0);
    OffsetVarNodes(whereClause, rtoffset, 0);
    
    /* 从技术上看来,上拉子查询就是把子查询中范围表拉到From子句,并把Where条件合并 */
    parse->rtable = list_concat(parse->rtable, subselect->rtable);
    
    /*
     * And finally, build the JoinExpr node.
     */
    result = makeNode(JoinExpr);
    result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
    return result;
}

不能上拉的条件:

  1. 右操作数:
    1. 不支持带有WITH子句的格式
    2. 不支持集合操作或者带有CTE(定义临时表的存在只是一个查询/withi子句),聚合函数, HAVING,LIMIT等格式
    3. 不支持FROM或者WHERE子句为空
    4. 不支持关联子查询,但是Where必须有上层Var
    5. 易失函数不能优化

上拉子查询

搜索From子句中的范围表,上拉其中子查询

void
pull_up_subqueries(PlannerInfo *root)
{
    /* Recursion starts with no containing join nor appendrel */
    root->parse->jointree = (FromExpr *)
    	pull_up_subqueries_recurse(root, (Node *) root->parse->jointree,
    						NULL, NULL, NULL, false);
}

真正执行上拉

static Node *
pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
						   JoinExpr *lowest_outer_join,
						   JoinExpr *lowest_nulling_outer_join,
						   AppendRelInfo *containing_appendrel,
						   bool deletion_ok)
{
    Assert(jtnode != NULL);
    if (IsA(jtnode, RangeTblRef))//一个范围表
    {
    	if (rte->rtekind == RTE_SUBQUERY &&
    		is_simple_subquery(rte->subquery, rte,
			   lowest_outer_join, deletion_ok) &&//如果是简单子查询,上拉
    		(containing_appendrel == NULL ||
    		 is_safe_append_member(rte->subquery)))
    		return pull_up_simple_subquery(root, jtnode, rte,
				   lowest_outer_join,
				   lowest_nulling_outer_join,
				   containing_appendrel,
				   deletion_ok);
    

    	if (rte->rtekind == RTE_SUBQUERY &&
    		is_simple_union_all(rte->subquery))//如果是简单UNION操作,上拉
    		return pull_up_simple_union_all(root, jtnode, rte);
    
    	//普通Value,上拉
    	if (rte->rtekind == RTE_VALUES &&
    		lowest_outer_join == NULL &&
    		containing_appendrel == NULL &&
    		is_simple_values(root, rte, deletion_ok))
    		return pull_up_simple_values(root, jtnode, rte);
    
    	/* Otherwise, do nothing at this node. */
    }
    else if (IsA(jtnode, FromExpr))
    {
    	FromExpr   *f = (FromExpr *) jtnode;

    	foreach(l, f->fromlist)
    	{
    		lfirst(l) = pull_up_subqueries_recurse(root, lfirst(l),
						   lowest_outer_join,
						   lowest_nulling_outer_join,
						   NULL,
						   sub_deletion_ok);
    	}
    }
    else if (IsA(jtnode, JoinExpr))
    {
    	JoinExpr   *j = (JoinExpr *) jtnode;
    
    	/* Recurse, being careful to tell myself when inside outer join */
    	switch (j->jointype)
    	{
    	    //对各种JOIN子句进行上拉
    	}
    }
    else
    	elog(ERROR, "unrecognized node type: %d",
    		 (int) nodeTag(jtnode));
    return jtnode;
}

可以上拉的:

  1. 简单子查询,Union ALL,普通Value
  2. 可以递归多个From子句,也可以递归上拉join左右操作数

优化顶层Union all

void
flatten_simple_union_all(PlannerInfo *root)
{
	Query	   *parse = root->parse;

	/* 存在Union嵌套,就是WITH子句 */
	if (root->hasRecursion)
		return;

	/* UNION ALL的列不同*/
	if (!is_simple_union_all_recurse((Node *) topop, parse, topop->colTypes))
		return;
    //构造AppendRefInfo节点优化UNION ALL
	pull_up_union_leaf_queries((Node *) topop, root, leftmostRTI, parse, 0);
}

UNION和UNION ALL和OR

  1. UNION和UNION ALL都是使用AppendRefInfo替代,但是UNION会排序和去重
  2. OR里面会有OrFilter
原文地址:https://www.cnblogs.com/biterror/p/7161666.html