PostgreSQL在何处处理 sql查询之五十四

接前面,从 cheapeast_path 的角度,关注 query_planner 函数,对其进行简化:

void
query_planner(PlannerInfo *root, List *tlist,
              double tuple_fraction, double limit_tuples,
              Path **cheapest_path, Path **sorted_path,
              double *num_groups)
{
    Query       *parse = root->parse;
    List       *joinlist;
    RelOptInfo *final_rel;
    Path       *cheapestpath;
    Path       *sortedpath;
    Index        rti;
    double        total_pages;

    ...

    *cheapest_path = cheapestpath;
    *sorted_path = sortedpath;
}

再关注 cheapestpath:

Path       *cheapestpath;

...

    /*
     * Pick out the cheapest-total path and the cheapest presorted path for
     * the requested pathkeys (if there is one).  We should take the tuple
     * fraction into account when selecting the cheapest presorted path, but
     * not when selecting the cheapest-total path, since if we have to sort
     * then we'll have to fetch all the tuples.  (But there's a special case:
     * if query_pathkeys is NIL, meaning order doesn't matter, then the
     * "cheapest presorted" path will be the cheapest overall for the tuple
     * fraction.)
     *
     * The cheapest-total path is also the one to use if grouping_planner
     * decides to use hashed aggregation, so we return it separately even if
     * this routine thinks the presorted path is the winner.
     */
    cheapestpath = final_rel->cheapest_total_path;

再看 final_rel: 从final_rel的观点,对 query_planner 进行简化:

RelOptInfo *final_rel;

...

    /*
     * Ready to do the primary planning.
     */
    final_rel = make_one_rel(root, joinlist);

...
    cheapestpath = final_rel->cheapest_total_path;

接着,对 make_one_rel里如何返回 cheapest_total_path 进行深入分析:

/*
 * make_one_rel
 *      Finds all possible access paths for executing a query, returning a
 *      single rel that represents the join of all base rels in the query.
 */
RelOptInfo *
make_one_rel(PlannerInfo *root, List *joinlist)
{
    RelOptInfo *rel;
    Index        rti;

    /*
     * Construct the all_baserels Relids set.
     */
    root->all_baserels = NULL;

    for (rti = 1; rti < root->simple_rel_array_size; rti++)
    {
        RelOptInfo *brel = root->simple_rel_array[rti];

        /* there may be empty slots corresponding to non-baserel RTEs */
        if (brel == NULL)
            continue;

        Assert(brel->relid == rti);        /* sanity check on array */

        /* ignore RTEs that are "other rels" */
        if (brel->reloptkind != RELOPT_BASEREL)
            continue;

        root->all_baserels = bms_add_member(root->all_baserels, brel->relid);

    }

    /*
     * Generate access paths for the base rels.
     */
    set_base_rel_sizes(root);
    set_base_rel_pathlists(root);

    /*
     * Generate access paths for the entire join tree.
     */
    rel = make_rel_from_joinlist(root, joinlist);

    /*
     * The result should join all and only the query's base rels.
     */
    Assert(bms_equal(rel->relids, root->all_baserels));

    return rel;
}

若要进一步分析,就是这个  make_rel_from_joinlist 了:

/*
 * make_rel_from_joinlist
 *      Build access paths using a "joinlist" to guide the join path search.
 *
 * See comments for deconstruct_jointree() for definition of the joinlist
 * data structure.
 */
static RelOptInfo *
make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
{
    int            levels_needed;
    List       *initial_rels;
    ListCell   *jl;

    /*
     * Count the number of child joinlist nodes.  This is the depth of the
     * dynamic-programming algorithm we must employ to consider all ways of
     * joining the child nodes.
     */
    levels_needed = list_length(joinlist);

    if (levels_needed <= 0)
        return NULL;            /* nothing to do? */

    /*
     * Construct a list of rels corresponding to the child joinlist nodes.
     * This may contain both base rels and rels constructed according to
     * sub-joinlists.
     */
    initial_rels = NIL;
    foreach(jl, joinlist)
    {
        Node       *jlnode = (Node *) lfirst(jl);
        RelOptInfo *thisrel;

        if (IsA(jlnode, RangeTblRef))
        {
            int            varno = ((RangeTblRef *) jlnode)->rtindex;

            thisrel = find_base_rel(root, varno);
        }
        else if (IsA(jlnode, List))
        {
            /* Recurse to handle subproblem */
            thisrel = make_rel_from_joinlist(root, (List *) jlnode);
        }
        else
        {
            elog(ERROR, "unrecognized joinlist node type: %d",
                 (int) nodeTag(jlnode));
            thisrel = NULL;        /* keep compiler quiet */
        }

        initial_rels = lappend(initial_rels, thisrel);
    }

    if (levels_needed == 1)
    {
        /*
         * Single joinlist node, so we're done.
         */
        return (RelOptInfo *) linitial(initial_rels);
    }
    else
    {
        /*
         * Consider the different orders in which we could join the rels,
         * using a plugin, GEQO, or the regular join search code.
         *
         * We put the initial_rels list into a PlannerInfo field because
         * has_legal_joinclause() needs to look at it (ugly :-().
         */
        root->initial_rels = initial_rels;

        if (join_search_hook)
            return (*join_search_hook) (root, levels_needed, initial_rels);
        else if (enable_geqo && levels_needed >= geqo_threshold)
            return geqo(root, levels_needed, initial_rels);
        else
            return standard_join_search(root, levels_needed, initial_rels);
    }
}

对于我的简单查询,以上这个  (levels_needed == 1) 条件得到满足,所以就直接  return (RelOptInfo *) linitial(initial_rels);

按照这个视点,简化上述的程序:

static RelOptInfo *
make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
{
    int            levels_needed;
    List       *initial_rels;
    ListCell   *jl;

    /*
     * Count the number of child joinlist nodes.  This is the depth of the
     * dynamic-programming algorithm we must employ to consider all ways of
     * joining the child nodes.
     */
    levels_needed = list_length(joinlist);

    ...
    /*
     * Construct a list of rels corresponding to the child joinlist nodes.
     * This may contain both base rels and rels constructed according to
     * sub-joinlists.
     */
    initial_rels = NIL;
    foreach(jl, joinlist)
    {
        Node       *jlnode = (Node *) lfirst(jl);
        RelOptInfo *thisrel;

        if (IsA(jlnode, RangeTblRef))
        {
int   varno = ((RangeTblRef *) jlnode)->rtindex;

            thisrel = find_base_rel(root, varno);
        }
        else if (IsA(jlnode, List))
        {

            ...
        }
        ...

        initial_rels = lappend(initial_rels, thisrel);
    }

    if (levels_needed == 1)
    {
        /*
         * Single joinlist node, so we're done.
         */
        return (RelOptInfo *) linitial(initial_rels);
    }
    else
    {
       ...
    }
}

这句 

int            varno = ((RangeTblRef *) jlnode)->rtindex;

是拿到 RangeTable 的 index。

thisrel = find_base_rel(root, varno);

是利用刚才的 index 从 root 中把这个 rel 找出来。

 initial_rels = lappend(initial_rels, thisrel);

是挂出一条链条。

return (RelOptInfo *) linitial(initial_rels);

是把第一个RelOptInfo对象取出来。

可以说,虽然是 在 make_rel_from_joinlist 中取出 rel 对象,但是其 cheapest_total_path ,在此之前早就准备好了。

原文地址:https://www.cnblogs.com/gaojian/p/3121299.html