PostgreSQL在何处处理 sql查询之四十一

接前面,看被SeqNext所调用的heap_getnext:

HeapTuple
heap_getnext(HeapScanDesc scan, ScanDirection direction)
{
    /* Note: no locking manipulations needed */

    HEAPDEBUG_1;                /* heap_getnext( info ) */

    if (scan->rs_pageatatime)
        heapgettup_pagemode(scan, direction,scan->rs_nkeys, scan->rs_key);
    else
        heapgettup(scan, direction, scan->rs_nkeys, scan->rs_key);

    if (scan->rs_ctup.t_data == NULL)
    {
        HEAPDEBUG_2;            /* heap_getnext returning EOS */
        return NULL;
    }

    /*
     * if we get here it means we have a new current scan tuple, so point to
     * the proper return buffer and return the tuple.
     */
    HEAPDEBUG_3;                /* heap_getnext returning tuple */

    pgstat_count_heap_getnext(scan->rs_rd);

    return &(scan->rs_ctup);
}

我执行SQL文  select id, val from tst04 where id>1 时, 上述代码中的 (scan->rs_pageatatime) 为true。

所以会执行: heapgettup_pagemode(scan, direction,scan->rs_nkeys, scan->rs_key);

接着分析 heapgetup_pagemode函数:

/* ----------------
 *        heapgettup_pagemode - fetch next heap tuple in page-at-a-time mode
 *
 *        Same API as heapgettup, but used in page-at-a-time mode
 *
 * The internal logic is much the same as heapgettup's too, but there are some
 * differences: we do not take the buffer content lock (that only needs to
 * happen inside heapgetpage), and we iterate through just the tuples listed
 * in rs_vistuples[] rather than all tuples on the page.  Notice that
 * lineindex is 0-based, where the corresponding loop variable lineoff in
 * heapgettup is 1-based.
 * ----------------
 */
static void
heapgettup_pagemode(HeapScanDesc scan,
                    ScanDirection dir,
                    int nkeys,
                    ScanKey key)
{
    HeapTuple    tuple = &(scan->rs_ctup);
    bool        backward = ScanDirectionIsBackward(dir);
    BlockNumber page;
    bool        finished;
    Page        dp;
    int            lines;
    int            lineindex;
    OffsetNumber lineoff;
    int            linesleft;
    ItemId        lpp;

    /*
     * calculate next starting lineindex, given scan direction
     */
    if (ScanDirectionIsForward(dir))
    {
        if (!scan->rs_inited)
        {
            /*
             * return null immediately if relation is empty
             */
            if (scan->rs_nblocks == 0)
            {
                Assert(!BufferIsValid(scan->rs_cbuf));
                tuple->t_data = NULL;
                return;
            }
            page = scan->rs_startblock; /* first page */
            heapgetpage(scan, page);
            lineindex = 0;
            scan->rs_inited = true;
        }
        else
        {
            /* continue from previously returned page/tuple */
            page = scan->rs_cblock;        /* current page */
            lineindex = scan->rs_cindex + 1;
        }

        dp = (Page) BufferGetPage(scan->rs_cbuf);
        lines = scan->rs_ntuples;
        /* page and lineindex now reference the next visible tid */

        linesleft = lines - lineindex;
    }
    else if (backward)
    {
        if (!scan->rs_inited)
        {
            /*
             * return null immediately if relation is empty
             */
            if (scan->rs_nblocks == 0)
            {
                Assert(!BufferIsValid(scan->rs_cbuf));
                tuple->t_data = NULL;
                return;
            }

            /*
             * Disable reporting to syncscan logic in a backwards scan; it's
             * not very likely anyone else is doing the same thing at the same
             * time, and much more likely that we'll just bollix things for
             * forward scanners.
             */
            scan->rs_syncscan = false;
            /* start from last page of the scan */
            if (scan->rs_startblock > 0)
                page = scan->rs_startblock - 1;
            else
                page = scan->rs_nblocks - 1;
            heapgetpage(scan, page);
        }
        else
        {
            /* continue from previously returned page/tuple */
            page = scan->rs_cblock;        /* current page */
        }

        dp = (Page) BufferGetPage(scan->rs_cbuf);
        lines = scan->rs_ntuples;

        if (!scan->rs_inited)
        {
            lineindex = lines - 1;
            scan->rs_inited = true;
        }
        else
        {
            lineindex = scan->rs_cindex - 1;
        }
        /* page and lineindex now reference the previous visible tid */

        linesleft = lineindex + 1;
    }
    else
    {
        /*
         * ``no movement'' scan direction: refetch prior tuple
         */
        if (!scan->rs_inited)
        {
            Assert(!BufferIsValid(scan->rs_cbuf));
            tuple->t_data = NULL;
            return;
        }

        page = ItemPointerGetBlockNumber(&(tuple->t_self));
        if (page != scan->rs_cblock)
            heapgetpage(scan, page);

        /* Since the tuple was previously fetched, needn't lock page here */
        dp = (Page) BufferGetPage(scan->rs_cbuf);
        lineoff = ItemPointerGetOffsetNumber(&(tuple->t_self));
        lpp = PageGetItemId(dp, lineoff);
        Assert(ItemIdIsNormal(lpp));

        tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
        tuple->t_len = ItemIdGetLength(lpp);

        /* check that rs_cindex is in sync */
        Assert(scan->rs_cindex < scan->rs_ntuples);
        Assert(lineoff == scan->rs_vistuples[scan->rs_cindex]);

        return;
    }

    /*
     * advance the scan until we find a qualifying tuple or run out of stuff
     * to scan
     */
    for (;;)
    {
        while (linesleft > 0)
        {
            lineoff = scan->rs_vistuples[lineindex];
            lpp = PageGetItemId(dp, lineoff);
            Assert(ItemIdIsNormal(lpp));

            tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp);
            tuple->t_len = ItemIdGetLength(lpp);
            ItemPointerSet(&(tuple->t_self), page, lineoff);

            /*
             * if current tuple qualifies, return it.
             */
            if (key != NULL)
            {
                bool        valid;

                HeapKeyTest(tuple, RelationGetDescr(scan->rs_rd),
                            nkeys, key, valid);
                if (valid)
                {
                    scan->rs_cindex = lineindex;
                    return;
                }
            }
            else
            {
                scan->rs_cindex = lineindex;
                return;
            }

            /*
             * otherwise move to the next item on the page
             */
            --linesleft;
            if (backward)
                --lineindex;
            else
                ++lineindex;
        }

        /*
         * if we get here, it means we've exhausted the items on this page and
         * it's time to move to the next.
         */
        if (backward)
        {
            finished = (page == scan->rs_startblock);
            if (page == 0)
                page = scan->rs_nblocks;
            page--;
        }
        else
        {
            page++;
            if (page >= scan->rs_nblocks)
                page = 0;
            finished = (page == scan->rs_startblock);

            /*
             * Report our new scan position for synchronization purposes. We
             * don't do that when moving backwards, however. That would just
             * mess up any other forward-moving scanners.
             *
             * Note: we do this before checking for end of scan so that the
             * final state of the position hint is back at the start of the
             * rel.  That's not strictly necessary, but otherwise when you run
             * the same query multiple times the starting position would shift
             * a little bit backwards on every invocation, which is confusing.
             * We don't guarantee any specific ordering in general, though.
             */
            if (scan->rs_syncscan)
                ss_report_location(scan->rs_rd, page);
        }

        /*
         * return NULL if we've exhausted all the pages
         */
        if (finished)
        {
            if (BufferIsValid(scan->rs_cbuf))
                ReleaseBuffer(scan->rs_cbuf);
            scan->rs_cbuf = InvalidBuffer;
            scan->rs_cblock = InvalidBlockNumber;
            tuple->t_data = NULL;
            scan->rs_inited = false;
            return;
        }

        heapgetpage(scan, page);

        dp = (Page) BufferGetPage(scan->rs_cbuf);
        lines = scan->rs_ntuples;
        linesleft = lines;
        if (backward)
            lineindex = lines - 1;
        else
            lineindex = 0;
    }
}

进行简化:

/* ----------------
 *        heapgettup_pagemode - fetch next heap tuple in page-at-a-time mode
 *
 *        Same API as heapgettup, but used in page-at-a-time mode
 *
 * The internal logic is much the same as heapgettup's too, but there are some
 * differences: we do not take the buffer content lock (that only needs to
 * happen inside heapgetpage), and we iterate through just the tuples listed
 * in rs_vistuples[] rather than all tuples on the page.  Notice that
 * lineindex is 0-based, where the corresponding loop variable lineoff in
 * heapgettup is 1-based.
 * ----------------
 */
static void
heapgettup_pagemode(HeapScanDesc scan,
                    ScanDirection dir,
                    int nkeys,
                    ScanKey key)
{

...
/* * calculate next starting lineindex, given scan direction */ if (ScanDirectionIsForward(dir)) { ...
} else if (backward) {
...
} else {
...
} /* * advance the scan until we find a qualifying tuple or run out of stuff * to scan */ for (;;) {
...
} }

再看其分支条件:我的查询满足 (ScanDirectionIsForward(dir)) 的条件。

暂时变成:

/* ----------------
 *        heapgettup_pagemode - fetch next heap tuple in page-at-a-time mode
 *
 *        Same API as heapgettup, but used in page-at-a-time mode
 *
 * The internal logic is much the same as heapgettup's too, but there are some
 * differences: we do not take the buffer content lock (that only needs to
 * happen inside heapgetpage), and we iterate through just the tuples listed
 * in rs_vistuples[] rather than all tuples on the page.  Notice that
 * lineindex is 0-based, where the corresponding loop variable lineoff in
 * heapgettup is 1-based.
 * ----------------
 */
static void
heapgettup_pagemode(HeapScanDesc scan,
                    ScanDirection dir,
                    int nkeys,
                    ScanKey key)
{

...
/* * calculate next starting lineindex, given scan direction */ if (ScanDirectionIsForward(dir)) { ...
}

...
/* * advance the scan until we find a qualifying tuple or run out of stuff * to scan */ for (;;) {
...
} }

再进一步分析;

    if (ScanDirectionIsForward(dir))
    {

        fprintf(stderr,"ScanDirectionIsForward(dir) is true\n");

        if (!scan->rs_inited)
        {
            /*
             * return null immediately if relation is empty
             */
            if (scan->rs_nblocks == 0)
            {
                Assert(!BufferIsValid(scan->rs_cbuf));
                tuple->t_data = NULL;
                return;
            }
            page = scan->rs_startblock; /* first page */
            heapgetpage(scan, page);
            lineindex = 0;
            scan->rs_inited = true;
        }
        else
        {
/* continue from previously returned page/tuple */
            page = scan->rs_cblock;        /* current page */
            lineindex = scan->rs_cindex + 1;
        }

        dp = (Page) BufferGetPage(scan->rs_cbuf);
        lines = scan->rs_ntuples;
        /* page and lineindex now reference the next visible tid */

        linesleft = lines - lineindex;
    }

当扫描尚未开始的时候,scan->inited 为false,扫描了第一条记录以后,则 scan->inited 变成true。

第一次扫描时做的是这个:

        if (!scan->rs_inited)
        {
                        ...
            page = scan->rs_startblock; /* first page */
            heapgetpage(scan, page);
            lineindex = 0;
            scan->rs_inited = true;
        }
        else
        {
                       ...
        }

        dp = (Page) BufferGetPage(scan->rs_cbuf);
        lines = scan->rs_ntuples;
        /* page and lineindex now reference the next visible tid */

        linesleft = lines - lineindex;

第一次以外的扫描作的是这个:

if (!scan->rs_inited)
        {
                     ...
        }
        else
        {
/* continue from previously returned page/tuple */
            page = scan->rs_cblock;        /* current page */
            lineindex = scan->rs_cindex + 1;
        }

        dp = (Page) BufferGetPage(scan->rs_cbuf);
        lines = scan->rs_ntuples;
        /* page and lineindex now reference the next visible tid */
        linesleft = lines - lineindex;

下面将进一步分析 rs_startblock 从何处开始被设置:

HeapScanDesc 的定义:

/* struct definition appears in relscan.h */
typedef struct HeapScanDescData *HeapScanDesc
typedef struct HeapScanDescData
{
    /* scan parameters */
    Relation    rs_rd;            /* heap relation descriptor */
    Snapshot    rs_snapshot;    /* snapshot to see */
    int            rs_nkeys;        /* number of scan keys */
    ScanKey        rs_key;            /* array of scan key descriptors */
    bool        rs_bitmapscan;    /* true if this is really a bitmap scan */
    bool        rs_pageatatime; /* verify visibility page-at-a-time? */
    bool        rs_allow_strat; /* allow or disallow use of access strategy */
    bool        rs_allow_sync;    /* allow or disallow use of syncscan */

    /* state set up at initscan time */
    BlockNumber rs_nblocks;        /* number of blocks to scan */
    BlockNumber rs_startblock;    /* block # to start at */
    BufferAccessStrategy rs_strategy;    /* access strategy for reads */
    bool        rs_syncscan;    /* report location to syncscan logic? */

    /* scan current state */
    bool        rs_inited;        /* false = scan not init'd yet */
    HeapTupleData rs_ctup;        /* current tuple in scan, if any */
    BlockNumber rs_cblock;        /* current block # in scan, if any */
    Buffer        rs_cbuf;        /* current buffer in scan, if any */
    /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
    ItemPointerData rs_mctid;    /* marked scan position, if any */

    /* these fields only used in page-at-a-time mode and for bitmap scans */
    int            rs_cindex;        /* current tuple's index in vistuples */
    int            rs_mindex;        /* marked tuple's saved index */
    int            rs_ntuples;        /* number of visible tuples on page */
    OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];    /* their offsets */
}    HeapScanDescData;

 实际上,在 heap_getnext 函数中, scan->rs_startblock 就是0。

在 node->ss_currentScanDesc->rs_startblock 中,从参数node开始, rs_startblock 就是0。 

原文地址:https://www.cnblogs.com/gaojian/p/3113903.html