PostgreSQL在何处处理 sql查询之六十四

Path的定义：

/*
 * Type "Path" is used as-is for sequential-scan paths, as well as some other
 * simple plan types that we don't need any extra information in the path for.
 * For other path types it is the first component of a larger struct.
 *
 * "pathtype" is the NodeTag of the Plan node we could build from this Path.
 * It is partially redundant with the Path's NodeTag, but allows us to use
 * the same Path type for multiple Plan types when there is no need to
 * distinguish the Plan type during path processing.
 *
 * "param_info", if not NULL, links to a ParamPathInfo that identifies outer
 * relation(s) that provide parameter values to each scan of this path.
 * That means this path can only be joined to those rels by means of nestloop
 * joins with this path on the inside.    Also note that a parameterized path
 * is responsible for testing all "movable" joinclauses involving this rel
 * and the specified outer rel(s).
 *
 * "rows" is the same as parent->rows in simple paths, but in parameterized
 * paths and UniquePaths it can be less than parent->rows, reflecting the
 * fact that we've filtered by extra join conditions or removed duplicates.
 *
 * "pathkeys" is a List of PathKey nodes (see above), describing the sort
 * ordering of the path's output rows.
 */
typedef struct Path
{
    NodeTag        type;

    NodeTag        pathtype;        /* tag identifying scan/join method */

    RelOptInfo *parent;            /* the relation this path can build */
    ParamPathInfo *param_info;    /* parameterization info, or NULL if none */

    /* estimated size/costs for path (see costsize.c for more info) */
    double        rows;            /* estimated number of result tuples */
    Cost        startup_cost;    /* cost expended before fetching any tuples */
    Cost        total_cost;        /* total cost (assuming all tuples fetched) */

    List       *pathkeys;        /* sort ordering of path's output */
    /* pathkeys is a List of PathKey nodes; see above */
} Path;

hash join 的path计算是在这里：

/*
 * hash_inner_and_outer
 *      Create hashjoin join paths by explicitly hashing both the outer and
 *      inner keys of each available hash clause.
 *
 * 'joinrel' is the join relation
 * 'outerrel' is the outer join relation
 * 'innerrel' is the inner join relation
 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
 *        clauses that apply to this join
 * 'jointype' is the type of join to do
 * 'sjinfo' is extra info about the join for selectivity estimation
 * 'semifactors' contains valid data if jointype is SEMI or ANTI
 * 'param_source_rels' are OK targets for parameterization of result paths
 */
static void
hash_inner_and_outer(PlannerInfo *root,
                     RelOptInfo *joinrel,
                     RelOptInfo *outerrel,
                     RelOptInfo *innerrel,
                     List *restrictlist,
                     JoinType jointype,
                     SpecialJoinInfo *sjinfo,
                     SemiAntiJoinFactors *semifactors,
                     Relids param_source_rels)
{
    bool        isouterjoin = IS_OUTER_JOIN(jointype);
    List       *hashclauses;
    ListCell   *l;

    /*
     * We need to build only one hashclauses list for any given pair of outer
     * and inner relations; all of the hashable clauses will be used as keys.
     *
     * Scan the join's restrictinfo list to find hashjoinable clauses that are
     * usable with this pair of sub-relations.
     */
    hashclauses = NIL;
    foreach(l, restrictlist)
    {
        RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);

        /*
         * If processing an outer join, only use its own join clauses for
         * hashing.  For inner joins we need not be so picky.
         */
        if (isouterjoin && restrictinfo->is_pushed_down)
            continue;

        if (!restrictinfo->can_join ||
            restrictinfo->hashjoinoperator == InvalidOid)
            continue;            /* not hashjoinable */

        /*
         * Check if clause has the form "outer op inner" or "inner op outer".
         */
        if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
            continue;            /* no good for these input relations */

        hashclauses = lappend(hashclauses, restrictinfo);
    }


    /* If we found any usable hashclauses, make paths */
    if (hashclauses)
    {
        /*
         * We consider both the cheapest-total-cost and cheapest-startup-cost
         * outer paths.  There's no need to consider any but the
         * cheapest-total-cost inner path, however.
         */
        Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
        Path       *cheapest_total_outer = outerrel->cheapest_total_path;
        Path       *cheapest_total_inner = innerrel->cheapest_total_path;

        /* Unique-ify if need be; we ignore parameterized possibilities */
        if (jointype == JOIN_UNIQUE_OUTER)
        {

            fprintf(stderr,"add_paths_to_joinrel--------1\n");

            cheapest_total_outer = (Path *)
                create_unique_path(root, outerrel,
                                   cheapest_total_outer, sjinfo);
            Assert(cheapest_total_outer);
            jointype = JOIN_INNER;
            try_hashjoin_path(root,
                              joinrel,
                              jointype,
                              sjinfo,
                              semifactors,
                              param_source_rels,
                              cheapest_total_outer,
                              cheapest_total_inner,
                              restrictlist,
                              hashclauses);
            /* no possibility of cheap startup here */
        }
        else if (jointype == JOIN_UNIQUE_INNER)
        {

            fprintf(stderr,"add_paths_to_joinrel--------1\n");

            cheapest_total_inner = (Path *)
                create_unique_path(root, innerrel,
                                   cheapest_total_inner, sjinfo);
            Assert(cheapest_total_inner);
            jointype = JOIN_INNER;
            try_hashjoin_path(root,
                              joinrel,
                              jointype,
                              sjinfo,
                              semifactors,
                              param_source_rels,
                              cheapest_total_outer,
                              cheapest_total_inner,
                              restrictlist,
                              hashclauses);
            if (cheapest_startup_outer != cheapest_total_outer)
                try_hashjoin_path(root,
                                  joinrel,
                                  jointype,
                                  sjinfo,
                                  semifactors,
                                  param_source_rels,
                                  cheapest_startup_outer,
                                  cheapest_total_inner,
                                  restrictlist,
                                  hashclauses);
        }
        else
        {

            fprintf(stderr,"add_paths_to_joinrel--------3\n");

            /*
             * For other jointypes, we consider the cheapest startup outer
             * together with the cheapest total inner, and then consider
             * pairings of cheapest-total paths including parameterized ones.
             * There is no use in generating parameterized paths on the basis
             * of possibly cheap startup cost, so this is sufficient.
             */
            ListCell   *lc1;
            ListCell   *lc2;

            try_hashjoin_path(root,
                              joinrel,
                              jointype,
                              sjinfo,
                              semifactors,
                              param_source_rels,
                              cheapest_startup_outer,
                              cheapest_total_inner,
                              restrictlist,
                              hashclauses);

            foreach(lc1, outerrel->cheapest_parameterized_paths)
            {
                Path       *outerpath = (Path *) lfirst(lc1);

                /*
                 * We cannot use an outer path that is parameterized by the
                 * inner rel.
                 */
                if (bms_overlap(PATH_REQ_OUTER(outerpath), innerrel->relids))
                    continue;

                foreach(lc2, innerrel->cheapest_parameterized_paths)
                {
                    Path       *innerpath = (Path *) lfirst(lc2);

                    /*
                     * We cannot use an inner path that is parameterized by
                     * the outer rel, either.
                     */
                    if (bms_overlap(PATH_REQ_OUTER(innerpath),
                                    outerrel->relids))
                        continue;

                    if (outerpath == cheapest_startup_outer &&
                        innerpath == cheapest_total_inner)
                        continue;        /* already tried it */

                    try_hashjoin_path(root,
                                      joinrel,
                                      jointype,
                                      sjinfo,
                                      semifactors,
                                      param_source_rels,
                                      outerpath,
                                      innerpath,
                                      restrictlist,
                                      hashclauses);
                }
            }
        }
    }
}

对我的查询，进行简化：

postgres=# select * from sales s inner join customers c on c.cust_id = s.cust_id;
 cust_id |   item   | cust_id | cust_name 
---------+----------+---------+-----------
       2 | camera   |       2 | John Doe
       3 | computer |       3 | Jane Doe
       3 | monitor  |       3 | Jane Doe
(3 rows)

postgres=#

简化后：

static void
hash_inner_and_outer(PlannerInfo *root,
                     RelOptInfo *joinrel,
                     RelOptInfo *outerrel,
                     RelOptInfo *innerrel,
                     List *restrictlist,
                     JoinType jointype,
                     SpecialJoinInfo *sjinfo,
                     SemiAntiJoinFactors *semifactors,
                     Relids param_source_rels)
{
    ...
    hashclauses = NIL;
    foreach(l, restrictlist)
    {
        ...
        hashclauses = lappend(hashclauses, restrictinfo);
    }


    /* If we found any usable hashclauses, make paths */
    if (hashclauses)
    {
        ...
        Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
        Path       *cheapest_total_outer = outerrel->cheapest_total_path;
        Path       *cheapest_total_inner = innerrel->cheapest_total_path;

        /* Unique-ify if need be; we ignore parameterized possibilities */
        if (jointype == JOIN_UNIQUE_OUTER)
        {
           ...
        }
        else if (jointype == JOIN_UNIQUE_INNER)
        {
           ...
        }
        else
        {
/*
             * For other jointypes, we consider the cheapest startup outer
             * together with the cheapest total inner, and then consider
             * pairings of cheapest-total paths including parameterized ones.
             * There is no use in generating parameterized paths on the basis
             * of possibly cheap startup cost, so this is sufficient.
             */
            ListCell   *lc1;
            ListCell   *lc2;

            try_hashjoin_path(root,
                              joinrel,
                              jointype,
                              sjinfo,
                              semifactors,
                              param_source_rels,
                              cheapest_startup_outer,
                              cheapest_total_inner,
                              restrictlist,
                              hashclauses);

           ...
        }
    }
}

而 try_hashjoin_path是如何作呢？

/*
 * try_hashjoin_path
 *      Consider a hash join path; if it appears useful, push it into
 *      the joinrel's pathlist via add_path().
 */
static void
try_hashjoin_path(PlannerInfo *root,
                  RelOptInfo *joinrel,
                  JoinType jointype,
                  SpecialJoinInfo *sjinfo,
                  SemiAntiJoinFactors *semifactors,
                  Relids param_source_rels,
                  Path *outer_path,
                  Path *inner_path,
                  List *restrict_clauses,
                  List *hashclauses)
{
    Relids        required_outer;
    JoinCostWorkspace workspace;

    /*
     * Check to see if proposed path is still parameterized, and reject if the
     * parameterization wouldn't be sensible.
     */
    required_outer = calc_non_nestloop_required_outer(outer_path,
                                                      inner_path);
    if (required_outer &&
        !bms_overlap(required_outer, param_source_rels))
    {
        /* Waste no memory when we reject a path here */
        bms_free(required_outer);
        return;
    }

    /*
     * See comments in try_nestloop_path().  Also note that hashjoin paths
     * never have any output pathkeys, per comments in create_hashjoin_path.
     */
    initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
                          outer_path, inner_path,
                          sjinfo, semifactors);

    if (add_path_precheck(joinrel,
                          workspace.startup_cost, workspace.total_cost,
                          NIL, required_outer))
    {
        add_path(joinrel, (Path *)
                 create_hashjoin_path(root,
                                      joinrel,
                                      jointype,
                                      &workspace,
                                      sjinfo,
                                      semifactors,
                                      outer_path,
                                      inner_path,
                                      restrict_clauses,
                                      required_outer,
                                      hashclauses));
    }
    else
    {
        /* Waste no memory when we reject a path here */
        bms_free(required_outer);
    }
}

接着看 create_hashjoin_path函数：

/*
 * create_hashjoin_path
 *      Creates a pathnode corresponding to a hash join between two relations.
 *
 * 'joinrel' is the join relation
 * 'jointype' is the type of join required
 * 'workspace' is the result from initial_cost_hashjoin
 * 'sjinfo' is extra info about the join for selectivity estimation
 * 'semifactors' contains valid data if jointype is SEMI or ANTI
 * 'outer_path' is the cheapest outer path
 * 'inner_path' is the cheapest inner path
 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
 * 'required_outer' is the set of required outer rels
 * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
 *        (this should be a subset of the restrict_clauses list)
 */
HashPath *
create_hashjoin_path(PlannerInfo *root,
                     RelOptInfo *joinrel,
                     JoinType jointype,
                     JoinCostWorkspace *workspace,
                     SpecialJoinInfo *sjinfo,
                     SemiAntiJoinFactors *semifactors,
                     Path *outer_path,
                     Path *inner_path,
                     List *restrict_clauses,
                     Relids required_outer,
                     List *hashclauses)
{
    HashPath   *pathnode = makeNode(HashPath);

    pathnode->jpath.path.pathtype = T_HashJoin;
    pathnode->jpath.path.parent = joinrel;
    pathnode->jpath.path.param_info =
        get_joinrel_parampathinfo(root,
                                  joinrel,
                                  outer_path,
                                  inner_path,
                                  sjinfo,
                                  required_outer,
                                  &restrict_clauses);

    /*
     * A hashjoin never has pathkeys, since its output ordering is
     * unpredictable due to possible batching.    XXX If the inner relation is
     * small enough, we could instruct the executor that it must not batch,
     * and then we could assume that the output inherits the outer relation's
     * ordering, which might save a sort step.    However there is considerable
     * downside if our estimate of the inner relation size is badly off. For
     * the moment we don't risk it.  (Note also that if we wanted to take this
     * seriously, joinpath.c would have to consider many more paths for the
     * outer rel than it does now.)
     */
    pathnode->jpath.path.pathkeys = NIL;
    pathnode->jpath.jointype = jointype;
    pathnode->jpath.outerjoinpath = outer_path;
    pathnode->jpath.innerjoinpath = inner_path;
    pathnode->jpath.joinrestrictinfo = restrict_clauses;
    pathnode->path_hashclauses = hashclauses;
    /* final_cost_hashjoin will fill in pathnode->num_batches */

    final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors);

    return pathnode;
}

再往下看，final_cost_hashjoin:

/*
 * final_cost_hashjoin
 *      Final estimate of the cost and result size of a hashjoin path.
 *
 * Note: the numbatches estimate is also saved into 'path' for use later
 *
 * 'path' is already filled in except for the rows and cost fields and
 *        num_batches
 * 'workspace' is the result from initial_cost_hashjoin
 * 'sjinfo' is extra info about the join for selectivity estimation
 * 'semifactors' contains valid data if path->jointype is SEMI or ANTI
 */
void
final_cost_hashjoin(PlannerInfo *root, HashPath *path,
                    JoinCostWorkspace *workspace,
                    SpecialJoinInfo *sjinfo,
                    SemiAntiJoinFactors *semifactors)
{
    Path       *outer_path = path->jpath.outerjoinpath;
    Path       *inner_path = path->jpath.innerjoinpath;
    double        outer_path_rows = outer_path->rows;
    double        inner_path_rows = inner_path->rows;
    List       *hashclauses = path->path_hashclauses;
    Cost        startup_cost = workspace->startup_cost;
    Cost        run_cost = workspace->run_cost;
    int            numbuckets = workspace->numbuckets;
    int            numbatches = workspace->numbatches;
    Cost        cpu_per_tuple;
    QualCost    hash_qual_cost;
    QualCost    qp_qual_cost;
    double        hashjointuples;
    double        virtualbuckets;
    Selectivity innerbucketsize;
    ListCell   *hcl;

    /* Mark the path with the correct row estimate */
    if (path->jpath.path.param_info)
        path->jpath.path.rows = path->jpath.path.param_info->ppi_rows;
    else
        path->jpath.path.rows = path->jpath.path.parent->rows;

    /*
     * We could include disable_cost in the preliminary estimate, but that
     * would amount to optimizing for the case where the join method is
     * disabled, which doesn't seem like the way to bet.
     */
    if (!enable_hashjoin)
        startup_cost += disable_cost;

    /* mark the path with estimated # of batches */
    path->num_batches = numbatches;

    /* and compute the number of "virtual" buckets in the whole join */
    virtualbuckets = (double) numbuckets *(double) numbatches;

    /*
     * Determine bucketsize fraction for inner relation.  We use the smallest
     * bucketsize estimated for any individual hashclause; this is undoubtedly
     * conservative.
     *
     * BUT: if inner relation has been unique-ified, we can assume it's good
     * for hashing.  This is important both because it's the right answer, and
     * because we avoid contaminating the cache with a value that's wrong for
     * non-unique-ified paths.
     */
    if (IsA(inner_path, UniquePath))
        innerbucketsize = 1.0 / virtualbuckets;
    else
    {
        innerbucketsize = 1.0;
        foreach(hcl, hashclauses)
        {
            RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
            Selectivity thisbucketsize;

            Assert(IsA(restrictinfo, RestrictInfo));

            /*
             * First we have to figure out which side of the hashjoin clause
             * is the inner side.
             *
             * Since we tend to visit the same clauses over and over when
             * planning a large query, we cache the bucketsize estimate in the
             * RestrictInfo node to avoid repeated lookups of statistics.
             */
            if (bms_is_subset(restrictinfo->right_relids,
                              inner_path->parent->relids))
            {
                /* righthand side is inner */
                thisbucketsize = restrictinfo->right_bucketsize;
                if (thisbucketsize < 0)
                {
                    /* not cached yet */
                    thisbucketsize =
                        estimate_hash_bucketsize(root,
                                           get_rightop(restrictinfo->clause),
                                                 virtualbuckets);
                    restrictinfo->right_bucketsize = thisbucketsize;
                }
            }
            else
            {
                Assert(bms_is_subset(restrictinfo->left_relids,
                                     inner_path->parent->relids));
                /* lefthand side is inner */
                thisbucketsize = restrictinfo->left_bucketsize;
                if (thisbucketsize < 0)
                {
                    /* not cached yet */
                    thisbucketsize =
                        estimate_hash_bucketsize(root,
                                            get_leftop(restrictinfo->clause),
                                                 virtualbuckets);
                    restrictinfo->left_bucketsize = thisbucketsize;
                }
            }

            if (innerbucketsize > thisbucketsize)
                innerbucketsize = thisbucketsize;
        }
    }

    /*
     * Compute cost of the hashquals and qpquals (other restriction clauses)
     * separately.
     */
    cost_qual_eval(&hash_qual_cost, hashclauses, root);
    cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
    qp_qual_cost.startup -= hash_qual_cost.startup;
    qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;

    /* CPU costs */

    if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI)
    {
        double        outer_matched_rows;
        Selectivity inner_scan_frac;

        /*
         * SEMI or ANTI join: executor will stop after first match.
         *
         * For an outer-rel row that has at least one match, we can expect the
         * bucket scan to stop after a fraction 1/(match_count+1) of the
         * bucket's rows, if the matches are evenly distributed.  Since they
         * probably aren't quite evenly distributed, we apply a fuzz factor of
         * 2.0 to that fraction.  (If we used a larger fuzz factor, we'd have
         * to clamp inner_scan_frac to at most 1.0; but since match_count is
         * at least 1, no such clamp is needed now.)
         */
        outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
        inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);

        startup_cost += hash_qual_cost.startup;
        run_cost += hash_qual_cost.per_tuple * outer_matched_rows *
            clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5;

        /*
         * For unmatched outer-rel rows, the picture is quite a lot different.
         * In the first place, there is no reason to assume that these rows
         * preferentially hit heavily-populated buckets; instead assume they
         * are uncorrelated with the inner distribution and so they see an
         * average bucket size of inner_path_rows / virtualbuckets.  In the
         * second place, it seems likely that they will have few if any exact
         * hash-code matches and so very few of the tuples in the bucket will
         * actually require eval of the hash quals.  We don't have any good
         * way to estimate how many will, but for the moment assume that the
         * effective cost per bucket entry is one-tenth what it is for
         * matchable tuples.
         */
        run_cost += hash_qual_cost.per_tuple *
            (outer_path_rows - outer_matched_rows) *
            clamp_row_est(inner_path_rows / virtualbuckets) * 0.05;

        /* Get # of tuples that will pass the basic join */
        if (path->jpath.jointype == JOIN_SEMI)
            hashjointuples = outer_matched_rows;
        else
            hashjointuples = outer_path_rows - outer_matched_rows;
    }
    else
    {
        /*
         * The number of tuple comparisons needed is the number of outer
         * tuples times the typical number of tuples in a hash bucket, which
         * is the inner relation size times its bucketsize fraction.  At each
         * one, we need to evaluate the hashjoin quals.  But actually,
         * charging the full qual eval cost at each tuple is pessimistic,
         * since we don't evaluate the quals unless the hash values match
         * exactly.  For lack of a better idea, halve the cost estimate to
         * allow for that.
         */
        startup_cost += hash_qual_cost.startup;
        run_cost += hash_qual_cost.per_tuple * outer_path_rows *
            clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;

        /*
         * Get approx # tuples passing the hashquals.  We use
         * approx_tuple_count here because we need an estimate done with
         * JOIN_INNER semantics.
         */
        hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
    }

    /*
     * For each tuple that gets through the hashjoin proper, we charge
     * cpu_tuple_cost plus the cost of evaluating additional restriction
     * clauses that are to be applied at the join.    (This is pessimistic since
     * not all of the quals may get evaluated at each tuple.)
     */
    startup_cost += qp_qual_cost.startup;
    cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
    run_cost += cpu_per_tuple * hashjointuples;

    path->jpath.path.startup_cost = startup_cost;
    path->jpath.path.total_cost = startup_cost + run_cost;
}