LCOV - code coverage report
Current view: top level - src/backend/access/gist - gistbuild.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 292 303 96.4 %
Date: 2017-09-29 15:12:54 Functions: 16 16 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gistbuild.c
       4             :  *    build algorithm for GiST indexes implementation.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/gist/gistbuild.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <math.h>
      18             : 
      19             : #include "access/genam.h"
      20             : #include "access/gist_private.h"
      21             : #include "access/gistxlog.h"
      22             : #include "access/xloginsert.h"
      23             : #include "catalog/index.h"
      24             : #include "miscadmin.h"
      25             : #include "optimizer/cost.h"
      26             : #include "storage/bufmgr.h"
      27             : #include "storage/smgr.h"
      28             : #include "utils/memutils.h"
      29             : #include "utils/rel.h"
      30             : 
      31             : /* Step of index tuples for check whether to switch to buffering build mode */
      32             : #define BUFFERING_MODE_SWITCH_CHECK_STEP 256
      33             : 
      34             : /*
      35             :  * Number of tuples to process in the slow way before switching to buffering
      36             :  * mode, when buffering is explicitly turned on. Also, the number of tuples
      37             :  * to process between readjusting the buffer size parameter, while in
      38             :  * buffering mode.
      39             :  */
      40             : #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096
      41             : 
      42             : typedef enum
      43             : {
      44             :     GIST_BUFFERING_DISABLED,    /* in regular build mode and aren't going to
      45             :                                  * switch */
      46             :     GIST_BUFFERING_AUTO,        /* in regular build mode, but will switch to
      47             :                                  * buffering build mode if the index grows too
      48             :                                  * big */
      49             :     GIST_BUFFERING_STATS,       /* gathering statistics of index tuple size
      50             :                                  * before switching to the buffering build
      51             :                                  * mode */
      52             :     GIST_BUFFERING_ACTIVE       /* in buffering build mode */
      53             : } GistBufferingMode;
      54             : 
      55             : /* Working state for gistbuild and its callback */
      56             : typedef struct
      57             : {
      58             :     Relation    indexrel;
      59             :     GISTSTATE  *giststate;
      60             : 
      61             :     int64       indtuples;      /* number of tuples indexed */
      62             :     int64       indtuplesSize;  /* total size of all indexed tuples */
      63             : 
      64             :     Size        freespace;      /* amount of free space to leave on pages */
      65             : 
      66             :     /*
      67             :      * Extra data structures used during a buffering build. 'gfbb' contains
      68             :      * information related to managing the build buffers. 'parentMap' is a
      69             :      * lookup table of the parent of each internal page.
      70             :      */
      71             :     GISTBuildBuffers *gfbb;
      72             :     HTAB       *parentMap;
      73             : 
      74             :     GistBufferingMode bufferingMode;
      75             : } GISTBuildState;
      76             : 
      77             : /* prototypes for private functions */
      78             : static void gistInitBuffering(GISTBuildState *buildstate);
      79             : static int  calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep);
      80             : static void gistBuildCallback(Relation index,
      81             :                   HeapTuple htup,
      82             :                   Datum *values,
      83             :                   bool *isnull,
      84             :                   bool tupleIsAlive,
      85             :                   void *state);
      86             : static void gistBufferingBuildInsert(GISTBuildState *buildstate,
      87             :                          IndexTuple itup);
      88             : static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
      89             :                 BlockNumber startblkno, int startlevel);
      90             : static BlockNumber gistbufferinginserttuples(GISTBuildState *buildstate,
      91             :                           Buffer buffer, int level,
      92             :                           IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
      93             :                           BlockNumber parentblk, OffsetNumber downlinkoffnum);
      94             : static Buffer gistBufferingFindCorrectParent(GISTBuildState *buildstate,
      95             :                                BlockNumber childblkno, int level,
      96             :                                BlockNumber *parentblk,
      97             :                                OffsetNumber *downlinkoffnum);
      98             : static void gistProcessEmptyingQueue(GISTBuildState *buildstate);
      99             : static void gistEmptyAllBuffers(GISTBuildState *buildstate);
     100             : static int  gistGetMaxLevel(Relation index);
     101             : 
     102             : static void gistInitParentMap(GISTBuildState *buildstate);
     103             : static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,
     104             :                    BlockNumber parent);
     105             : static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent);
     106             : static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
     107             : 
     108             : /*
     109             :  * Main entry point to GiST index build. Initially calls insert over and over,
     110             :  * but switches to more efficient buffering build algorithm after a certain
     111             :  * number of tuples (unless buffering mode is disabled).
     112             :  */
     113             : IndexBuildResult *
     114          27 : gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     115             : {
     116             :     IndexBuildResult *result;
     117             :     double      reltuples;
     118             :     GISTBuildState buildstate;
     119             :     Buffer      buffer;
     120             :     Page        page;
     121          27 :     MemoryContext oldcxt = CurrentMemoryContext;
     122             :     int         fillfactor;
     123             : 
     124          27 :     buildstate.indexrel = index;
     125          27 :     if (index->rd_options)
     126             :     {
     127             :         /* Get buffering mode from the options string */
     128           3 :         GiSTOptions *options = (GiSTOptions *) index->rd_options;
     129           3 :         char       *bufferingMode = (char *) options + options->bufferingModeOffset;
     130             : 
     131           3 :         if (strcmp(bufferingMode, "on") == 0)
     132           1 :             buildstate.bufferingMode = GIST_BUFFERING_STATS;
     133           2 :         else if (strcmp(bufferingMode, "off") == 0)
     134           1 :             buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
     135             :         else
     136           1 :             buildstate.bufferingMode = GIST_BUFFERING_AUTO;
     137             : 
     138           3 :         fillfactor = options->fillfactor;
     139             :     }
     140             :     else
     141             :     {
     142             :         /*
     143             :          * By default, switch to buffering mode when the index grows too large
     144             :          * to fit in cache.
     145             :          */
     146          24 :         buildstate.bufferingMode = GIST_BUFFERING_AUTO;
     147          24 :         fillfactor = GIST_DEFAULT_FILLFACTOR;
     148             :     }
     149             :     /* Calculate target amount of free space to leave on pages */
     150          27 :     buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
     151             : 
     152             :     /*
     153             :      * We expect to be called exactly once for any index relation. If that's
     154             :      * not the case, big trouble's what we have.
     155             :      */
     156          27 :     if (RelationGetNumberOfBlocks(index) != 0)
     157           0 :         elog(ERROR, "index \"%s\" already contains data",
     158             :              RelationGetRelationName(index));
     159             : 
     160             :     /* no locking is needed */
     161          27 :     buildstate.giststate = initGISTstate(index);
     162             : 
     163             :     /*
     164             :      * Create a temporary memory context that is reset once for each tuple
     165             :      * processed.  (Note: we don't bother to make this a child of the
     166             :      * giststate's scanCxt, so we have to delete it separately at the end.)
     167             :      */
     168          27 :     buildstate.giststate->tempCxt = createTempGistContext();
     169             : 
     170             :     /* initialize the root page */
     171          27 :     buffer = gistNewBuffer(index);
     172          27 :     Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
     173          27 :     page = BufferGetPage(buffer);
     174             : 
     175          27 :     START_CRIT_SECTION();
     176             : 
     177          27 :     GISTInitBuffer(buffer, F_LEAF);
     178             : 
     179          27 :     MarkBufferDirty(buffer);
     180             : 
     181          27 :     if (RelationNeedsWAL(index))
     182             :     {
     183             :         XLogRecPtr  recptr;
     184             : 
     185          24 :         XLogBeginInsert();
     186          24 :         XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
     187             : 
     188          24 :         recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
     189          24 :         PageSetLSN(page, recptr);
     190             :     }
     191             :     else
     192           3 :         PageSetLSN(page, gistGetFakeLSN(heap));
     193             : 
     194          27 :     UnlockReleaseBuffer(buffer);
     195             : 
     196          27 :     END_CRIT_SECTION();
     197             : 
     198             :     /* build the index */
     199          27 :     buildstate.indtuples = 0;
     200          27 :     buildstate.indtuplesSize = 0;
     201             : 
     202             :     /*
     203             :      * Do the heap scan.
     204             :      */
     205          27 :     reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
     206             :                                    gistBuildCallback, (void *) &buildstate);
     207             : 
     208             :     /*
     209             :      * If buffering was used, flush out all the tuples that are still in the
     210             :      * buffers.
     211             :      */
     212          27 :     if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE)
     213             :     {
     214           1 :         elog(DEBUG1, "all tuples processed, emptying buffers");
     215           1 :         gistEmptyAllBuffers(&buildstate);
     216           1 :         gistFreeBuildBuffers(buildstate.gfbb);
     217             :     }
     218             : 
     219             :     /* okay, all heap tuples are indexed */
     220          27 :     MemoryContextSwitchTo(oldcxt);
     221          27 :     MemoryContextDelete(buildstate.giststate->tempCxt);
     222             : 
     223          27 :     freeGISTstate(buildstate.giststate);
     224             : 
     225             :     /*
     226             :      * Return statistics
     227             :      */
     228          27 :     result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
     229             : 
     230          27 :     result->heap_tuples = reltuples;
     231          27 :     result->index_tuples = (double) buildstate.indtuples;
     232             : 
     233          27 :     return result;
     234             : }
     235             : 
     236             : /*
     237             :  * Validator for "buffering" reloption on GiST indexes. Allows "on", "off"
     238             :  * and "auto" values.
     239             :  */
     240             : void
     241           4 : gistValidateBufferingOption(char *value)
     242             : {
     243           8 :     if (value == NULL ||
     244           7 :         (strcmp(value, "on") != 0 &&
     245           5 :          strcmp(value, "off") != 0 &&
     246           2 :          strcmp(value, "auto") != 0))
     247             :     {
     248           1 :         ereport(ERROR,
     249             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     250             :                  errmsg("invalid value for \"buffering\" option"),
     251             :                  errdetail("Valid values are \"on\", \"off\", and \"auto\".")));
     252             :     }
     253           3 : }
     254             : 
     255             : /*
     256             :  * Attempt to switch to buffering mode.
     257             :  *
     258             :  * If there is not enough memory for buffering build, sets bufferingMode
     259             :  * to GIST_BUFFERING_DISABLED, so that we don't bother to try the switch
     260             :  * anymore. Otherwise initializes the build buffers, and sets bufferingMode to
     261             :  * GIST_BUFFERING_ACTIVE.
     262             :  */
     263             : static void
     264           1 : gistInitBuffering(GISTBuildState *buildstate)
     265             : {
     266           1 :     Relation    index = buildstate->indexrel;
     267             :     int         pagesPerBuffer;
     268             :     Size        pageFreeSpace;
     269             :     Size        itupAvgSize,
     270             :                 itupMinSize;
     271             :     double      avgIndexTuplesPerPage,
     272             :                 maxIndexTuplesPerPage;
     273             :     int         i;
     274             :     int         levelStep;
     275             : 
     276             :     /* Calc space of index page which is available for index tuples */
     277           1 :     pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
     278             :         - sizeof(ItemIdData)
     279           1 :         - buildstate->freespace;
     280             : 
     281             :     /*
     282             :      * Calculate average size of already inserted index tuples using gathered
     283             :      * statistics.
     284             :      */
     285           2 :     itupAvgSize = (double) buildstate->indtuplesSize /
     286           1 :         (double) buildstate->indtuples;
     287             : 
     288             :     /*
     289             :      * Calculate minimal possible size of index tuple by index metadata.
     290             :      * Minimal possible size of varlena is VARHDRSZ.
     291             :      *
     292             :      * XXX: that's not actually true, as a short varlen can be just 2 bytes.
     293             :      * And we should take padding into account here.
     294             :      */
     295           1 :     itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData));
     296           2 :     for (i = 0; i < index->rd_att->natts; i++)
     297             :     {
     298           1 :         if (TupleDescAttr(index->rd_att, i)->attlen < 0)
     299           0 :             itupMinSize += VARHDRSZ;
     300             :         else
     301           1 :             itupMinSize += TupleDescAttr(index->rd_att, i)->attlen;
     302             :     }
     303             : 
     304             :     /* Calculate average and maximal number of index tuples which fit to page */
     305           1 :     avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
     306           1 :     maxIndexTuplesPerPage = pageFreeSpace / itupMinSize;
     307             : 
     308             :     /*
     309             :      * We need to calculate two parameters for the buffering algorithm:
     310             :      * levelStep and pagesPerBuffer.
     311             :      *
     312             :      * levelStep determines the size of subtree that we operate on, while
     313             :      * emptying a buffer. A higher value is better, as you need fewer buffer
     314             :      * emptying steps to build the index. However, if you set it too high, the
     315             :      * subtree doesn't fit in cache anymore, and you quickly lose the benefit
     316             :      * of the buffers.
     317             :      *
     318             :      * In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is
     319             :      * the number of tuples on page (ie. fanout), and M is the amount of
     320             :      * internal memory available. Curiously, they doesn't explain *why* that
     321             :      * setting is optimal. We calculate it by taking the highest levelStep so
     322             :      * that a subtree still fits in cache. For a small B, our way of
     323             :      * calculating levelStep is very close to Arge et al's formula. For a
     324             :      * large B, our formula gives a value that is 2x higher.
     325             :      *
     326             :      * The average size (in pages) of a subtree of depth n can be calculated
     327             :      * as a geometric series:
     328             :      *
     329             :      * B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B)
     330             :      *
     331             :      * where B is the average number of index tuples on page. The subtree is
     332             :      * cached in the shared buffer cache and the OS cache, so we choose
     333             :      * levelStep so that the subtree size is comfortably smaller than
     334             :      * effective_cache_size, with a safety factor of 4.
     335             :      *
     336             :      * The estimate on the average number of index tuples on page is based on
     337             :      * average tuple sizes observed before switching to buffered build, so the
     338             :      * real subtree size can be somewhat larger. Also, it would selfish to
     339             :      * gobble the whole cache for our index build. The safety factor of 4
     340             :      * should account for those effects.
     341             :      *
     342             :      * The other limiting factor for setting levelStep is that while
     343             :      * processing a subtree, we need to hold one page for each buffer at the
     344             :      * next lower buffered level. The max. number of buffers needed for that
     345             :      * is maxIndexTuplesPerPage^levelStep. This is very conservative, but
     346             :      * hopefully maintenance_work_mem is set high enough that you're
     347             :      * constrained by effective_cache_size rather than maintenance_work_mem.
     348             :      *
     349             :      * XXX: the buffer hash table consumes a fair amount of memory too per
     350             :      * buffer, but that is not currently taken into account. That scales on
     351             :      * the total number of buffers used, ie. the index size and on levelStep.
     352             :      * Note that a higher levelStep *reduces* the amount of memory needed for
     353             :      * the hash table.
     354             :      */
     355           1 :     levelStep = 1;
     356             :     for (;;)
     357             :     {
     358             :         double      subtreesize;
     359             :         double      maxlowestlevelpages;
     360             : 
     361             :         /* size of an average subtree at this levelStep (in pages). */
     362           2 :         subtreesize =
     363           2 :             (1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) /
     364             :             (1 - avgIndexTuplesPerPage);
     365             : 
     366             :         /* max number of pages at the lowest level of a subtree */
     367           2 :         maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep);
     368             : 
     369             :         /* subtree must fit in cache (with safety factor of 4) */
     370           2 :         if (subtreesize > effective_cache_size / 4)
     371           0 :             break;
     372             : 
     373             :         /* each node in the lowest level of a subtree has one page in memory */
     374           2 :         if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ)
     375           1 :             break;
     376             : 
     377             :         /* Good, we can handle this levelStep. See if we can go one higher. */
     378           1 :         levelStep++;
     379           1 :     }
     380             : 
     381             :     /*
     382             :      * We just reached an unacceptable value of levelStep in previous loop.
     383             :      * So, decrease levelStep to get last acceptable value.
     384             :      */
     385           1 :     levelStep--;
     386             : 
     387             :     /*
     388             :      * If there's not enough cache or maintenance_work_mem, fall back to plain
     389             :      * inserts.
     390             :      */
     391           1 :     if (levelStep <= 0)
     392             :     {
     393           0 :         elog(DEBUG1, "failed to switch to buffered GiST build");
     394           0 :         buildstate->bufferingMode = GIST_BUFFERING_DISABLED;
     395           1 :         return;
     396             :     }
     397             : 
     398             :     /*
     399             :      * The second parameter to set is pagesPerBuffer, which determines the
     400             :      * size of each buffer. We adjust pagesPerBuffer also during the build,
     401             :      * which is why this calculation is in a separate function.
     402             :      */
     403           1 :     pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep);
     404             : 
     405             :     /* Initialize GISTBuildBuffers with these parameters */
     406           1 :     buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep,
     407             :                                             gistGetMaxLevel(index));
     408             : 
     409           1 :     gistInitParentMap(buildstate);
     410             : 
     411           1 :     buildstate->bufferingMode = GIST_BUFFERING_ACTIVE;
     412             : 
     413           1 :     elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
     414             :          levelStep, pagesPerBuffer);
     415             : }
     416             : 
     417             : /*
     418             :  * Calculate pagesPerBuffer parameter for the buffering algorithm.
     419             :  *
     420             :  * Buffer size is chosen so that assuming that tuples are distributed
     421             :  * randomly, emptying half a buffer fills on average one page in every buffer
     422             :  * at the next lower level.
     423             :  */
     424             : static int
     425           4 : calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep)
     426             : {
     427             :     double      pagesPerBuffer;
     428             :     double      avgIndexTuplesPerPage;
     429             :     double      itupAvgSize;
     430             :     Size        pageFreeSpace;
     431             : 
     432             :     /* Calc space of index page which is available for index tuples */
     433           4 :     pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
     434             :         - sizeof(ItemIdData)
     435           4 :         - buildstate->freespace;
     436             : 
     437             :     /*
     438             :      * Calculate average size of already inserted index tuples using gathered
     439             :      * statistics.
     440             :      */
     441           8 :     itupAvgSize = (double) buildstate->indtuplesSize /
     442           4 :         (double) buildstate->indtuples;
     443             : 
     444           4 :     avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
     445             : 
     446             :     /*
     447             :      * Recalculate required size of buffers.
     448             :      */
     449           4 :     pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep);
     450             : 
     451           4 :     return (int) rint(pagesPerBuffer);
     452             : }
     453             : 
     454             : /*
     455             :  * Per-tuple callback from IndexBuildHeapScan.
     456             :  */
     457             : static void
     458      101274 : gistBuildCallback(Relation index,
     459             :                   HeapTuple htup,
     460             :                   Datum *values,
     461             :                   bool *isnull,
     462             :                   bool tupleIsAlive,
     463             :                   void *state)
     464             : {
     465      101274 :     GISTBuildState *buildstate = (GISTBuildState *) state;
     466             :     IndexTuple  itup;
     467             :     MemoryContext oldCtx;
     468             : 
     469      101274 :     oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
     470             : 
     471             :     /* form an index tuple and point it at the heap tuple */
     472      101274 :     itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
     473      101274 :     itup->t_tid = htup->t_self;
     474             : 
     475      101274 :     if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
     476             :     {
     477             :         /* We have buffers, so use them. */
     478       15904 :         gistBufferingBuildInsert(buildstate, itup);
     479             :     }
     480             :     else
     481             :     {
     482             :         /*
     483             :          * There's no buffers (yet). Since we already have the index relation
     484             :          * locked, we call gistdoinsert directly.
     485             :          */
     486       85370 :         gistdoinsert(index, itup, buildstate->freespace,
     487             :                      buildstate->giststate);
     488             :     }
     489             : 
     490             :     /* Update tuple count and total size. */
     491      101274 :     buildstate->indtuples += 1;
     492      101274 :     buildstate->indtuplesSize += IndexTupleSize(itup);
     493             : 
     494      101274 :     MemoryContextSwitchTo(oldCtx);
     495      101274 :     MemoryContextReset(buildstate->giststate->tempCxt);
     496             : 
     497      117178 :     if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE &&
     498       15904 :         buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
     499             :     {
     500             :         /* Adjust the target buffer size now */
     501           6 :         buildstate->gfbb->pagesPerBuffer =
     502           3 :             calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep);
     503             :     }
     504             : 
     505             :     /*
     506             :      * In 'auto' mode, check if the index has grown too large to fit in cache,
     507             :      * and switch to buffering mode if it has.
     508             :      *
     509             :      * To avoid excessive calls to smgrnblocks(), only check this every
     510             :      * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples
     511             :      */
     512      162548 :     if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO &&
     513       61510 :          buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
     514      101510 :          effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) ||
     515      105370 :         (buildstate->bufferingMode == GIST_BUFFERING_STATS &&
     516        4096 :          buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
     517             :     {
     518             :         /*
     519             :          * Index doesn't fit in effective cache anymore. Try to switch to
     520             :          * buffering build mode.
     521             :          */
     522           1 :         gistInitBuffering(buildstate);
     523             :     }
     524      101274 : }
     525             : 
     526             : /*
     527             :  * Insert function for buffering index build.
     528             :  */
     529             : static void
     530       15904 : gistBufferingBuildInsert(GISTBuildState *buildstate, IndexTuple itup)
     531             : {
     532             :     /* Insert the tuple to buffers. */
     533       15904 :     gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel);
     534             : 
     535             :     /* If we filled up (half of a) buffer, process buffer emptying. */
     536       15904 :     gistProcessEmptyingQueue(buildstate);
     537       15904 : }
     538             : 
     539             : /*
     540             :  * Process an index tuple. Runs the tuple down the tree until we reach a leaf
     541             :  * page or node buffer, and inserts the tuple there. Returns true if we have
     542             :  * to stop buffer emptying process (because one of child buffers can't take
     543             :  * index tuples anymore).
     544             :  */
     545             : static bool
     546       31625 : gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
     547             :                 BlockNumber startblkno, int startlevel)
     548             : {
     549       31625 :     GISTSTATE  *giststate = buildstate->giststate;
     550       31625 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
     551       31625 :     Relation    indexrel = buildstate->indexrel;
     552             :     BlockNumber childblkno;
     553             :     Buffer      buffer;
     554       31625 :     bool        result = false;
     555             :     BlockNumber blkno;
     556             :     int         level;
     557       31625 :     OffsetNumber downlinkoffnum = InvalidOffsetNumber;
     558       31625 :     BlockNumber parentblkno = InvalidBlockNumber;
     559             : 
     560       31625 :     CHECK_FOR_INTERRUPTS();
     561             : 
     562             :     /*
     563             :      * Loop until we reach a leaf page (level == 0) or a level with buffers
     564             :      * (not including the level we start at, because we would otherwise make
     565             :      * no progress).
     566             :      */
     567       31625 :     blkno = startblkno;
     568       31625 :     level = startlevel;
     569             :     for (;;)
     570             :     {
     571             :         ItemId      iid;
     572             :         IndexTuple  idxtuple,
     573             :                     newtup;
     574             :         Page        page;
     575             :         OffsetNumber childoffnum;
     576             : 
     577             :         /* Have we reached a level with buffers? */
     578       63250 :         if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel)
     579       47346 :             break;
     580             : 
     581             :         /* Have we reached a leaf page? */
     582       47529 :         if (level == 0)
     583       15904 :             break;
     584             : 
     585             :         /*
     586             :          * Nope. Descend down to the next level then. Choose a child to
     587             :          * descend down to.
     588             :          */
     589             : 
     590       31625 :         buffer = ReadBuffer(indexrel, blkno);
     591       31625 :         LockBuffer(buffer, GIST_EXCLUSIVE);
     592             : 
     593       31625 :         page = (Page) BufferGetPage(buffer);
     594       31625 :         childoffnum = gistchoose(indexrel, page, itup, giststate);
     595       31625 :         iid = PageGetItemId(page, childoffnum);
     596       31625 :         idxtuple = (IndexTuple) PageGetItem(page, iid);
     597       31625 :         childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     598             : 
     599       31625 :         if (level > 1)
     600       15721 :             gistMemorizeParent(buildstate, childblkno, blkno);
     601             : 
     602             :         /*
     603             :          * Check that the key representing the target child node is consistent
     604             :          * with the key we're inserting. Update it if it's not.
     605             :          */
     606       31625 :         newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate);
     607       31625 :         if (newtup)
     608             :         {
     609       11706 :             blkno = gistbufferinginserttuples(buildstate,
     610             :                                               buffer,
     611             :                                               level,
     612             :                                               &newtup,
     613             :                                               1,
     614             :                                               childoffnum,
     615             :                                               InvalidBlockNumber,
     616             :                                               InvalidOffsetNumber);
     617             :             /* gistbufferinginserttuples() released the buffer */
     618             :         }
     619             :         else
     620       19919 :             UnlockReleaseBuffer(buffer);
     621             : 
     622             :         /* Descend to the child */
     623       31625 :         parentblkno = blkno;
     624       31625 :         blkno = childblkno;
     625       31625 :         downlinkoffnum = childoffnum;
     626       31625 :         Assert(level > 0);
     627       31625 :         level--;
     628       31625 :     }
     629             : 
     630       31625 :     if (LEVEL_HAS_BUFFERS(level, gfbb))
     631       15721 :     {
     632             :         /*
     633             :          * We've reached level with buffers. Place the index tuple to the
     634             :          * buffer, and add the buffer to the emptying queue if it overflows.
     635             :          */
     636             :         GISTNodeBuffer *childNodeBuffer;
     637             : 
     638             :         /* Find the buffer or create a new one */
     639       15721 :         childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level);
     640             : 
     641             :         /* Add index tuple to it */
     642       15721 :         gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup);
     643             : 
     644       15721 :         if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb))
     645           0 :             result = true;
     646             :     }
     647             :     else
     648             :     {
     649             :         /*
     650             :          * We've reached a leaf page. Place the tuple here.
     651             :          */
     652       15904 :         Assert(level == 0);
     653       15904 :         buffer = ReadBuffer(indexrel, blkno);
     654       15904 :         LockBuffer(buffer, GIST_EXCLUSIVE);
     655       15904 :         gistbufferinginserttuples(buildstate, buffer, level,
     656             :                                   &itup, 1, InvalidOffsetNumber,
     657             :                                   parentblkno, downlinkoffnum);
     658             :         /* gistbufferinginserttuples() released the buffer */
     659             :     }
     660             : 
     661       31625 :     return result;
     662             : }
     663             : 
     664             : /*
     665             :  * Insert tuples to a given page.
     666             :  *
     667             :  * This is analogous with gistinserttuples() in the regular insertion code.
     668             :  *
     669             :  * Returns the block number of the page where the (first) new or updated tuple
     670             :  * was inserted. Usually that's the original page, but might be a sibling page
     671             :  * if the original page was split.
     672             :  *
     673             :  * Caller should hold a lock on 'buffer' on entry. This function will unlock
     674             :  * and unpin it.
     675             :  */
     676             : static BlockNumber
     677       27817 : gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
     678             :                           IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
     679             :                           BlockNumber parentblk, OffsetNumber downlinkoffnum)
     680             : {
     681       27817 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
     682             :     List       *splitinfo;
     683             :     bool        is_split;
     684       27817 :     BlockNumber placed_to_blk = InvalidBlockNumber;
     685             : 
     686       27817 :     is_split = gistplacetopage(buildstate->indexrel,
     687             :                                buildstate->freespace,
     688             :                                buildstate->giststate,
     689             :                                buffer,
     690             :                                itup, ntup, oldoffnum, &placed_to_blk,
     691             :                                InvalidBuffer,
     692             :                                &splitinfo,
     693             :                                false);
     694             : 
     695             :     /*
     696             :      * If this is a root split, update the root path item kept in memory. This
     697             :      * ensures that all path stacks are always complete, including all parent
     698             :      * nodes up to the root. That simplifies the algorithm to re-find correct
     699             :      * parent.
     700             :      */
     701       27817 :     if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)
     702             :     {
     703           1 :         Page        page = BufferGetPage(buffer);
     704             :         OffsetNumber off;
     705             :         OffsetNumber maxoff;
     706             : 
     707           1 :         Assert(level == gfbb->rootlevel);
     708           1 :         gfbb->rootlevel++;
     709             : 
     710           1 :         elog(DEBUG2, "splitting GiST root page, now %d levels deep", gfbb->rootlevel);
     711             : 
     712             :         /*
     713             :          * All the downlinks on the old root page are now on one of the child
     714             :          * pages. Visit all the new child pages to memorize the parents of the
     715             :          * grandchildren.
     716             :          */
     717           1 :         if (gfbb->rootlevel > 1)
     718             :         {
     719           1 :             maxoff = PageGetMaxOffsetNumber(page);
     720           3 :             for (off = FirstOffsetNumber; off <= maxoff; off++)
     721             :             {
     722           2 :                 ItemId      iid = PageGetItemId(page, off);
     723           2 :                 IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     724           2 :                 BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     725           2 :                 Buffer      childbuf = ReadBuffer(buildstate->indexrel, childblkno);
     726             : 
     727           2 :                 LockBuffer(childbuf, GIST_SHARE);
     728           2 :                 gistMemorizeAllDownlinks(buildstate, childbuf);
     729           2 :                 UnlockReleaseBuffer(childbuf);
     730             : 
     731             :                 /*
     732             :                  * Also remember that the parent of the new child page is the
     733             :                  * root block.
     734             :                  */
     735           2 :                 gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO);
     736             :             }
     737             :         }
     738             :     }
     739             : 
     740       27817 :     if (splitinfo)
     741             :     {
     742             :         /*
     743             :          * Insert the downlinks to the parent. This is analogous with
     744             :          * gistfinishsplit() in the regular insertion code, but the locking is
     745             :          * simpler, and we have to maintain the buffers on internal nodes and
     746             :          * the parent map.
     747             :          */
     748             :         IndexTuple *downlinks;
     749             :         int         ndownlinks,
     750             :                     i;
     751             :         Buffer      parentBuffer;
     752             :         ListCell   *lc;
     753             : 
     754             :         /* Parent may have changed since we memorized this path. */
     755         207 :         parentBuffer =
     756         207 :             gistBufferingFindCorrectParent(buildstate,
     757             :                                            BufferGetBlockNumber(buffer),
     758             :                                            level,
     759             :                                            &parentblk,
     760             :                                            &downlinkoffnum);
     761             : 
     762             :         /*
     763             :          * If there's a buffer associated with this page, that needs to be
     764             :          * split too. gistRelocateBuildBuffersOnSplit() will also adjust the
     765             :          * downlinks in 'splitinfo', to make sure they're consistent not only
     766             :          * with the tuples already on the pages, but also the tuples in the
     767             :          * buffers that will eventually be inserted to them.
     768             :          */
     769         207 :         gistRelocateBuildBuffersOnSplit(gfbb,
     770             :                                         buildstate->giststate,
     771             :                                         buildstate->indexrel,
     772             :                                         level,
     773             :                                         buffer, splitinfo);
     774             : 
     775             :         /* Create an array of all the downlink tuples */
     776         207 :         ndownlinks = list_length(splitinfo);
     777         207 :         downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks);
     778         207 :         i = 0;
     779         621 :         foreach(lc, splitinfo)
     780             :         {
     781         414 :             GISTPageSplitInfo *splitinfo = lfirst(lc);
     782             : 
     783             :             /*
     784             :              * Remember the parent of each new child page in our parent map.
     785             :              * This assumes that the downlinks fit on the parent page. If the
     786             :              * parent page is split, too, when we recurse up to insert the
     787             :              * downlinks, the recursive gistbufferinginserttuples() call will
     788             :              * update the map again.
     789             :              */
     790         414 :             if (level > 0)
     791           6 :                 gistMemorizeParent(buildstate,
     792             :                                    BufferGetBlockNumber(splitinfo->buf),
     793             :                                    BufferGetBlockNumber(parentBuffer));
     794             : 
     795             :             /*
     796             :              * Also update the parent map for all the downlinks that got moved
     797             :              * to a different page. (actually this also loops through the
     798             :              * downlinks that stayed on the original page, but it does no
     799             :              * harm).
     800             :              */
     801         414 :             if (level > 1)
     802           0 :                 gistMemorizeAllDownlinks(buildstate, splitinfo->buf);
     803             : 
     804             :             /*
     805             :              * Since there's no concurrent access, we can release the lower
     806             :              * level buffers immediately. This includes the original page.
     807             :              */
     808         414 :             UnlockReleaseBuffer(splitinfo->buf);
     809         414 :             downlinks[i++] = splitinfo->downlink;
     810             :         }
     811             : 
     812             :         /* Insert them into parent. */
     813         207 :         gistbufferinginserttuples(buildstate, parentBuffer, level + 1,
     814             :                                   downlinks, ndownlinks, downlinkoffnum,
     815             :                                   InvalidBlockNumber, InvalidOffsetNumber);
     816             : 
     817         207 :         list_free_deep(splitinfo);  /* we don't need this anymore */
     818             :     }
     819             :     else
     820       27610 :         UnlockReleaseBuffer(buffer);
     821             : 
     822       27817 :     return placed_to_blk;
     823             : }
     824             : 
     825             : /*
     826             :  * Find the downlink pointing to a child page.
     827             :  *
     828             :  * 'childblkno' indicates the child page to find the parent for. 'level' is
     829             :  * the level of the child. On entry, *parentblkno and *downlinkoffnum can
     830             :  * point to a location where the downlink used to be - we will check that
     831             :  * location first, and save some cycles if it hasn't moved. The function
     832             :  * returns a buffer containing the downlink, exclusively-locked, and
     833             :  * *parentblkno and *downlinkoffnum are set to the real location of the
     834             :  * downlink.
     835             :  *
     836             :  * If the child page is a leaf (level == 0), the caller must supply a correct
     837             :  * parentblkno. Otherwise we use the parent map hash table to find the parent
     838             :  * block.
     839             :  *
     840             :  * This function serves the same purpose as gistFindCorrectParent() during
     841             :  * normal index inserts, but this is simpler because we don't need to deal
     842             :  * with concurrent inserts.
     843             :  */
     844             : static Buffer
     845         207 : gistBufferingFindCorrectParent(GISTBuildState *buildstate,
     846             :                                BlockNumber childblkno, int level,
     847             :                                BlockNumber *parentblkno,
     848             :                                OffsetNumber *downlinkoffnum)
     849             : {
     850             :     BlockNumber parent;
     851             :     Buffer      buffer;
     852             :     Page        page;
     853             :     OffsetNumber maxoff;
     854             :     OffsetNumber off;
     855             : 
     856         207 :     if (level > 0)
     857           3 :         parent = gistGetParent(buildstate, childblkno);
     858             :     else
     859             :     {
     860             :         /*
     861             :          * For a leaf page, the caller must supply a correct parent block
     862             :          * number.
     863             :          */
     864         204 :         if (*parentblkno == InvalidBlockNumber)
     865           0 :             elog(ERROR, "no parent buffer provided of child %d", childblkno);
     866         204 :         parent = *parentblkno;
     867             :     }
     868             : 
     869         207 :     buffer = ReadBuffer(buildstate->indexrel, parent);
     870         207 :     page = BufferGetPage(buffer);
     871         207 :     LockBuffer(buffer, GIST_EXCLUSIVE);
     872         207 :     gistcheckpage(buildstate->indexrel, buffer);
     873         207 :     maxoff = PageGetMaxOffsetNumber(page);
     874             : 
     875             :     /* Check if it was not moved */
     876         411 :     if (parent == *parentblkno && *parentblkno != InvalidBlockNumber &&
     877         408 :         *downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff)
     878             :     {
     879         204 :         ItemId      iid = PageGetItemId(page, *downlinkoffnum);
     880         204 :         IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     881             : 
     882         204 :         if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
     883             :         {
     884             :             /* Still there */
     885         204 :             return buffer;
     886             :         }
     887             :     }
     888             : 
     889             :     /*
     890             :      * Downlink was not at the offset where it used to be. Scan the page to
     891             :      * find it. During normal gist insertions, it might've moved to another
     892             :      * page, to the right, but during a buffering build, we keep track of the
     893             :      * parent of each page in the lookup table so we should always know what
     894             :      * page it's on.
     895             :      */
     896           8 :     for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off))
     897             :     {
     898           8 :         ItemId      iid = PageGetItemId(page, off);
     899           8 :         IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     900             : 
     901           8 :         if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
     902             :         {
     903             :             /* yes!!, found it */
     904           3 :             *downlinkoffnum = off;
     905           3 :             return buffer;
     906             :         }
     907             :     }
     908             : 
     909           0 :     elog(ERROR, "failed to re-find parent for block %u", childblkno);
     910             :     return InvalidBuffer;       /* keep compiler quiet */
     911             : }
     912             : 
     913             : /*
     914             :  * Process buffers emptying stack. Emptying of one buffer can cause emptying
     915             :  * of other buffers. This function iterates until this cascading emptying
     916             :  * process finished, e.g. until buffers emptying stack is empty.
     917             :  */
     918             : static void
     919       15909 : gistProcessEmptyingQueue(GISTBuildState *buildstate)
     920             : {
     921       15909 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
     922             : 
     923             :     /* Iterate while we have elements in buffers emptying stack. */
     924       31823 :     while (gfbb->bufferEmptyingQueue != NIL)
     925             :     {
     926             :         GISTNodeBuffer *emptyingNodeBuffer;
     927             : 
     928             :         /* Get node buffer from emptying stack. */
     929           5 :         emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue);
     930           5 :         gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue);
     931           5 :         emptyingNodeBuffer->queuedForEmptying = false;
     932             : 
     933             :         /*
     934             :          * We are going to load last pages of buffers where emptying will be
     935             :          * to. So let's unload any previously loaded buffers.
     936             :          */
     937           5 :         gistUnloadNodeBuffers(gfbb);
     938             : 
     939             :         /*
     940             :          * Pop tuples from the buffer and run them down to the buffers at
     941             :          * lower level, or leaf pages. We continue until one of the lower
     942             :          * level buffers fills up, or this buffer runs empty.
     943             :          *
     944             :          * In Arge et al's paper, the buffer emptying is stopped after
     945             :          * processing 1/2 node buffer worth of tuples, to avoid overfilling
     946             :          * any of the lower level buffers. However, it's more efficient to
     947             :          * keep going until one of the lower level buffers actually fills up,
     948             :          * so that's what we do. This doesn't need to be exact, if a buffer
     949             :          * overfills by a few tuples, there's no harm done.
     950             :          */
     951             :         while (true)
     952             :         {
     953             :             IndexTuple  itup;
     954             : 
     955             :             /* Get next index tuple from the buffer */
     956       15726 :             if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup))
     957          10 :                 break;
     958             : 
     959             :             /*
     960             :              * Run it down to the underlying node buffer or leaf page.
     961             :              *
     962             :              * Note: it's possible that the buffer we're emptying splits as a
     963             :              * result of this call. If that happens, our emptyingNodeBuffer
     964             :              * points to the left half of the split. After split, it's very
     965             :              * likely that the new left buffer is no longer over the half-full
     966             :              * threshold, but we might as well keep flushing tuples from it
     967             :              * until we fill a lower-level buffer.
     968             :              */
     969       15721 :             if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level))
     970             :             {
     971             :                 /*
     972             :                  * A lower level buffer filled up. Stop emptying this buffer,
     973             :                  * to avoid overflowing the lower level buffer.
     974             :                  */
     975           0 :                 break;
     976             :             }
     977             : 
     978             :             /* Free all the memory allocated during index tuple processing */
     979       15721 :             MemoryContextReset(buildstate->giststate->tempCxt);
     980       15721 :         }
     981             :     }
     982       15909 : }
     983             : 
     984             : /*
     985             :  * Empty all node buffers, from top to bottom. This is done at the end of
     986             :  * index build to flush all remaining tuples to the index.
     987             :  *
     988             :  * Note: This destroys the buffersOnLevels lists, so the buffers should not
     989             :  * be inserted to after this call.
     990             :  */
     991             : static void
     992           1 : gistEmptyAllBuffers(GISTBuildState *buildstate)
     993             : {
     994           1 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
     995             :     MemoryContext oldCtx;
     996             :     int         i;
     997             : 
     998           1 :     oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
     999             : 
    1000             :     /*
    1001             :      * Iterate through the levels from top to bottom.
    1002             :      */
    1003           3 :     for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--)
    1004             :     {
    1005             :         /*
    1006             :          * Empty all buffers on this level. Note that new buffers can pop up
    1007             :          * in the list during the processing, as a result of page splits, so a
    1008             :          * simple walk through the list won't work. We remove buffers from the
    1009             :          * list when we see them empty; a buffer can't become non-empty once
    1010             :          * it's been fully emptied.
    1011             :          */
    1012          14 :         while (gfbb->buffersOnLevels[i] != NIL)
    1013             :         {
    1014             :             GISTNodeBuffer *nodeBuffer;
    1015             : 
    1016          10 :             nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]);
    1017             : 
    1018          10 :             if (nodeBuffer->blocksCount != 0)
    1019             :             {
    1020             :                 /*
    1021             :                  * Add this buffer to the emptying queue, and proceed to empty
    1022             :                  * the queue.
    1023             :                  */
    1024           5 :                 if (!nodeBuffer->queuedForEmptying)
    1025             :                 {
    1026           5 :                     MemoryContextSwitchTo(gfbb->context);
    1027           5 :                     nodeBuffer->queuedForEmptying = true;
    1028           5 :                     gfbb->bufferEmptyingQueue =
    1029           5 :                         lcons(nodeBuffer, gfbb->bufferEmptyingQueue);
    1030           5 :                     MemoryContextSwitchTo(buildstate->giststate->tempCxt);
    1031             :                 }
    1032           5 :                 gistProcessEmptyingQueue(buildstate);
    1033             :             }
    1034             :             else
    1035          10 :                 gfbb->buffersOnLevels[i] =
    1036           5 :                     list_delete_first(gfbb->buffersOnLevels[i]);
    1037             :         }
    1038           2 :         elog(DEBUG2, "emptied all buffers at level %d", i);
    1039             :     }
    1040           1 :     MemoryContextSwitchTo(oldCtx);
    1041           1 : }
    1042             : 
    1043             : /*
    1044             :  * Get the depth of the GiST index.
    1045             :  */
    1046             : static int
    1047           1 : gistGetMaxLevel(Relation index)
    1048             : {
    1049             :     int         maxLevel;
    1050             :     BlockNumber blkno;
    1051             : 
    1052             :     /*
    1053             :      * Traverse down the tree, starting from the root, until we hit the leaf
    1054             :      * level.
    1055             :      */
    1056           1 :     maxLevel = 0;
    1057           1 :     blkno = GIST_ROOT_BLKNO;
    1058             :     while (true)
    1059             :     {
    1060             :         Buffer      buffer;
    1061             :         Page        page;
    1062             :         IndexTuple  itup;
    1063             : 
    1064           2 :         buffer = ReadBuffer(index, blkno);
    1065             : 
    1066             :         /*
    1067             :          * There's no concurrent access during index build, so locking is just
    1068             :          * pro forma.
    1069             :          */
    1070           2 :         LockBuffer(buffer, GIST_SHARE);
    1071           2 :         page = (Page) BufferGetPage(buffer);
    1072             : 
    1073           2 :         if (GistPageIsLeaf(page))
    1074             :         {
    1075             :             /* We hit the bottom, so we're done. */
    1076           1 :             UnlockReleaseBuffer(buffer);
    1077           1 :             break;
    1078             :         }
    1079             : 
    1080             :         /*
    1081             :          * Pick the first downlink on the page, and follow it. It doesn't
    1082             :          * matter which downlink we choose, the tree has the same depth
    1083             :          * everywhere, so we just pick the first one.
    1084             :          */
    1085           1 :         itup = (IndexTuple) PageGetItem(page,
    1086             :                                         PageGetItemId(page, FirstOffsetNumber));
    1087           1 :         blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
    1088           1 :         UnlockReleaseBuffer(buffer);
    1089             : 
    1090             :         /*
    1091             :          * We're going down on the tree. It means that there is yet one more
    1092             :          * level in the tree.
    1093             :          */
    1094           1 :         maxLevel++;
    1095           1 :     }
    1096           1 :     return maxLevel;
    1097             : }
    1098             : 
    1099             : 
    1100             : /*
    1101             :  * Routines for managing the parent map.
    1102             :  *
    1103             :  * Whenever a page is split, we need to insert the downlinks into the parent.
    1104             :  * We need to somehow find the parent page to do that. In normal insertions,
    1105             :  * we keep a stack of nodes visited when we descend the tree. However, in
    1106             :  * buffering build, we can start descending the tree from any internal node,
    1107             :  * when we empty a buffer by cascading tuples to its children. So we don't
    1108             :  * have a full stack up to the root available at that time.
    1109             :  *
    1110             :  * So instead, we maintain a hash table to track the parent of every internal
    1111             :  * page. We don't need to track the parents of leaf nodes, however. Whenever
    1112             :  * we insert to a leaf, we've just descended down from its parent, so we know
    1113             :  * its immediate parent already. This helps a lot to limit the memory used
    1114             :  * by this hash table.
    1115             :  *
    1116             :  * Whenever an internal node is split, the parent map needs to be updated.
    1117             :  * the parent of the new child page needs to be recorded, and also the
    1118             :  * entries for all page whose downlinks are moved to a new page at the split
    1119             :  * needs to be updated.
    1120             :  *
    1121             :  * We also update the parent map whenever we descend the tree. That might seem
    1122             :  * unnecessary, because we maintain the map whenever a downlink is moved or
    1123             :  * created, but it is needed because we switch to buffering mode after
    1124             :  * creating a tree with regular index inserts. Any pages created before
    1125             :  * switching to buffering mode will not be present in the parent map initially,
    1126             :  * but will be added there the first time we visit them.
    1127             :  */
    1128             : 
    1129             : typedef struct
    1130             : {
    1131             :     BlockNumber childblkno;     /* hash key */
    1132             :     BlockNumber parentblkno;
    1133             : } ParentMapEntry;
    1134             : 
    1135             : static void
    1136           1 : gistInitParentMap(GISTBuildState *buildstate)
    1137             : {
    1138             :     HASHCTL     hashCtl;
    1139             : 
    1140           1 :     hashCtl.keysize = sizeof(BlockNumber);
    1141           1 :     hashCtl.entrysize = sizeof(ParentMapEntry);
    1142           1 :     hashCtl.hcxt = CurrentMemoryContext;
    1143           1 :     buildstate->parentMap = hash_create("gistbuild parent map",
    1144             :                                         1024,
    1145             :                                         &hashCtl,
    1146             :                                         HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
    1147           1 : }
    1148             : 
    1149             : static void
    1150       15822 : gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, BlockNumber parent)
    1151             : {
    1152             :     ParentMapEntry *entry;
    1153             :     bool        found;
    1154             : 
    1155       15822 :     entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
    1156             :                                            (const void *) &child,
    1157             :                                            HASH_ENTER,
    1158             :                                            &found);
    1159       15822 :     entry->parentblkno = parent;
    1160       15822 : }
    1161             : 
    1162             : /*
    1163             :  * Scan all downlinks on a page, and memorize their parent.
    1164             :  */
    1165             : static void
    1166           2 : gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parentbuf)
    1167             : {
    1168             :     OffsetNumber maxoff;
    1169             :     OffsetNumber off;
    1170           2 :     BlockNumber parentblkno = BufferGetBlockNumber(parentbuf);
    1171           2 :     Page        page = BufferGetPage(parentbuf);
    1172             : 
    1173           2 :     Assert(!GistPageIsLeaf(page));
    1174             : 
    1175           2 :     maxoff = PageGetMaxOffsetNumber(page);
    1176          95 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
    1177             :     {
    1178          93 :         ItemId      iid = PageGetItemId(page, off);
    1179          93 :         IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
    1180          93 :         BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
    1181             : 
    1182          93 :         gistMemorizeParent(buildstate, childblkno, parentblkno);
    1183             :     }
    1184           2 : }
    1185             : 
    1186             : static BlockNumber
    1187           3 : gistGetParent(GISTBuildState *buildstate, BlockNumber child)
    1188             : {
    1189             :     ParentMapEntry *entry;
    1190             :     bool        found;
    1191             : 
    1192             :     /* Find node buffer in hash table */
    1193           3 :     entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
    1194             :                                            (const void *) &child,
    1195             :                                            HASH_FIND,
    1196             :                                            &found);
    1197           3 :     if (!found)
    1198           0 :         elog(ERROR, "could not find parent of block %d in lookup table", child);
    1199             : 
    1200           3 :     return entry->parentblkno;
    1201             : }

Generated by: LCOV version 1.11