LCOV - code coverage report
Current view: top level - src/backend/access/gin - gindatapage.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 536 677 79.2 %
Date: 2017-09-29 15:12:54 Functions: 26 29 89.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * gindatapage.c
       4             :  *    routines for handling GIN posting tree pages.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/gindatapage.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : #include "access/ginxlog.h"
      19             : #include "access/xloginsert.h"
      20             : #include "lib/ilist.h"
      21             : #include "miscadmin.h"
      22             : #include "utils/rel.h"
      23             : 
      24             : /*
      25             :  * Min, Max and Target size of posting lists stored on leaf pages, in bytes.
      26             :  *
      27             :  * The code can deal with any size, but random access is more efficient when
      28             :  * a number of smaller lists are stored, rather than one big list. If a
      29             :  * posting list would become larger than Max size as a result of insertions,
      30             :  * it is split into two. If a posting list would be smaller than minimum
      31             :  * size, it is merged with the next posting list.
      32             :  */
      33             : #define GinPostingListSegmentMaxSize 384
      34             : #define GinPostingListSegmentTargetSize 256
      35             : #define GinPostingListSegmentMinSize 128
      36             : 
      37             : /*
      38             :  * At least this many items fit in a GinPostingListSegmentMaxSize-bytes
      39             :  * long segment. This is used when estimating how much space is required
      40             :  * for N items, at minimum.
      41             :  */
      42             : #define MinTuplesPerSegment ((GinPostingListSegmentMaxSize - 2) / 6)
      43             : 
      44             : /*
      45             :  * A working struct for manipulating a posting tree leaf page.
      46             :  */
      47             : typedef struct
      48             : {
      49             :     dlist_head  segments;       /* a list of leafSegmentInfos */
      50             : 
      51             :     /*
      52             :      * The following fields represent how the segments are split across pages,
      53             :      * if a page split is required. Filled in by leafRepackItems.
      54             :      */
      55             :     dlist_node *lastleft;       /* last segment on left page */
      56             :     int         lsize;          /* total size on left page */
      57             :     int         rsize;          /* total size on right page */
      58             : 
      59             :     bool        oldformat;      /* page is in pre-9.4 format on disk */
      60             : 
      61             :     /*
      62             :      * If we need WAL data representing the reconstructed leaf page, it's
      63             :      * stored here by computeLeafRecompressWALData.
      64             :      */
      65             :     char       *walinfo;        /* buffer start */
      66             :     int         walinfolen;     /* and length */
      67             : } disassembledLeaf;
      68             : 
      69             : typedef struct
      70             : {
      71             :     dlist_node  node;           /* linked list pointers */
      72             : 
      73             :     /*-------------
      74             :      * 'action' indicates the status of this in-memory segment, compared to
      75             :      * what's on disk. It is one of the GIN_SEGMENT_* action codes:
      76             :      *
      77             :      * UNMODIFIED   no changes
      78             :      * DELETE       the segment is to be removed. 'seg' and 'items' are
      79             :      *              ignored
      80             :      * INSERT       this is a completely new segment
      81             :      * REPLACE      this replaces an existing segment with new content
      82             :      * ADDITEMS     like REPLACE, but no items have been removed, and we track
      83             :      *              in detail what items have been added to this segment, in
      84             :      *              'modifieditems'
      85             :      *-------------
      86             :      */
      87             :     char        action;
      88             : 
      89             :     ItemPointerData *modifieditems;
      90             :     uint16      nmodifieditems;
      91             : 
      92             :     /*
      93             :      * The following fields represent the items in this segment. If 'items' is
      94             :      * not NULL, it contains a palloc'd array of the itemsin this segment. If
      95             :      * 'seg' is not NULL, it contains the items in an already-compressed
      96             :      * format. It can point to an on-disk page (!modified), or a palloc'd
      97             :      * segment in memory. If both are set, they must represent the same items.
      98             :      */
      99             :     GinPostingList *seg;
     100             :     ItemPointer items;
     101             :     int         nitems;         /* # of items in 'items', if items != NULL */
     102             : } leafSegmentInfo;
     103             : 
     104             : static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
     105             : static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
     106             :                       GinBtreeStack *stack,
     107             :                       void *insertdata, BlockNumber updateblkno,
     108             :                       Page *newlpage, Page *newrpage);
     109             : 
     110             : static disassembledLeaf *disassembleLeaf(Page page);
     111             : static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
     112             : static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
     113             :                int nNewItems);
     114             : 
     115             : static void computeLeafRecompressWALData(disassembledLeaf *leaf);
     116             : static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
     117             : static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
     118             :                          ItemPointerData lbound, ItemPointerData rbound,
     119             :                          Page lpage, Page rpage);
     120             : 
     121             : /*
     122             :  * Read TIDs from leaf data page to single uncompressed array. The TIDs are
     123             :  * returned in ascending order.
     124             :  *
     125             :  * advancePast is a hint, indicating that the caller is only interested in
     126             :  * TIDs > advancePast. To return all items, use ItemPointerSetMin.
     127             :  *
     128             :  * Note: This function can still return items smaller than advancePast that
     129             :  * are in the same posting list as the items of interest, so the caller must
     130             :  * still check all the returned items. But passing it allows this function to
     131             :  * skip whole posting lists.
     132             :  */
     133             : ItemPointer
     134           6 : GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
     135             : {
     136             :     ItemPointer result;
     137             : 
     138           6 :     if (GinPageIsCompressed(page))
     139             :     {
     140           6 :         GinPostingList *seg = GinDataLeafPageGetPostingList(page);
     141           6 :         Size        len = GinDataLeafPageGetPostingListSize(page);
     142           6 :         Pointer     endptr = ((Pointer) seg) + len;
     143             :         GinPostingList *next;
     144             : 
     145             :         /* Skip to the segment containing advancePast+1 */
     146           6 :         if (ItemPointerIsValid(&advancePast))
     147             :         {
     148           4 :             next = GinNextPostingListSegment(seg);
     149          12 :             while ((Pointer) next < endptr &&
     150           4 :                    ginCompareItemPointers(&next->first, &advancePast) <= 0)
     151             :             {
     152           0 :                 seg = next;
     153           0 :                 next = GinNextPostingListSegment(seg);
     154             :             }
     155           4 :             len = endptr - (Pointer) seg;
     156             :         }
     157             : 
     158           6 :         if (len > 0)
     159           6 :             result = ginPostingListDecodeAllSegments(seg, len, nitems);
     160             :         else
     161             :         {
     162           0 :             result = NULL;
     163           0 :             *nitems = 0;
     164             :         }
     165             :     }
     166             :     else
     167             :     {
     168           0 :         ItemPointer tmp = dataLeafPageGetUncompressed(page, nitems);
     169             : 
     170           0 :         result = palloc((*nitems) * sizeof(ItemPointerData));
     171           0 :         memcpy(result, tmp, (*nitems) * sizeof(ItemPointerData));
     172             :     }
     173             : 
     174           6 :     return result;
     175             : }
     176             : 
     177             : /*
     178             :  * Places all TIDs from leaf data page to bitmap.
     179             :  */
     180             : int
     181           0 : GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
     182             : {
     183             :     ItemPointer uncompressed;
     184             :     int         nitems;
     185             : 
     186           0 :     if (GinPageIsCompressed(page))
     187             :     {
     188           0 :         GinPostingList *segment = GinDataLeafPageGetPostingList(page);
     189           0 :         Size        len = GinDataLeafPageGetPostingListSize(page);
     190             : 
     191           0 :         nitems = ginPostingListDecodeAllSegmentsToTbm(segment, len, tbm);
     192             :     }
     193             :     else
     194             :     {
     195           0 :         uncompressed = dataLeafPageGetUncompressed(page, &nitems);
     196             : 
     197           0 :         if (nitems > 0)
     198           0 :             tbm_add_tuples(tbm, uncompressed, nitems, false);
     199             :     }
     200             : 
     201           0 :     return nitems;
     202             : }
     203             : 
     204             : /*
     205             :  * Get pointer to the uncompressed array of items on a pre-9.4 format
     206             :  * uncompressed leaf page. The number of items in the array is returned in
     207             :  * *nitems.
     208             :  */
     209             : static ItemPointer
     210           0 : dataLeafPageGetUncompressed(Page page, int *nitems)
     211             : {
     212             :     ItemPointer items;
     213             : 
     214           0 :     Assert(!GinPageIsCompressed(page));
     215             : 
     216             :     /*
     217             :      * In the old pre-9.4 page format, the whole page content is used for
     218             :      * uncompressed items, and the number of items is stored in 'maxoff'
     219             :      */
     220           0 :     items = (ItemPointer) GinDataPageGetData(page);
     221           0 :     *nitems = GinPageGetOpaque(page)->maxoff;
     222             : 
     223           0 :     return items;
     224             : }
     225             : 
     226             : /*
     227             :  * Check if we should follow the right link to find the item we're searching
     228             :  * for.
     229             :  *
     230             :  * Compares inserting item pointer with the right bound of the current page.
     231             :  */
     232             : static bool
     233        3005 : dataIsMoveRight(GinBtree btree, Page page)
     234             : {
     235        3005 :     ItemPointer iptr = GinDataPageGetRightBound(page);
     236             : 
     237        3005 :     if (GinPageRightMost(page))
     238        3005 :         return FALSE;
     239             : 
     240           0 :     return (ginCompareItemPointers(&btree->itemptr, iptr) > 0) ? TRUE : FALSE;
     241             : }
     242             : 
     243             : /*
     244             :  * Find correct PostingItem in non-leaf page. It is assumed that this is
     245             :  * the correct page, and the searched value SHOULD be on the page.
     246             :  */
     247             : static BlockNumber
     248        3007 : dataLocateItem(GinBtree btree, GinBtreeStack *stack)
     249             : {
     250             :     OffsetNumber low,
     251             :                 high,
     252             :                 maxoff;
     253        3007 :     PostingItem *pitem = NULL;
     254             :     int         result;
     255        3007 :     Page        page = BufferGetPage(stack->buffer);
     256             : 
     257        3007 :     Assert(!GinPageIsLeaf(page));
     258        3007 :     Assert(GinPageIsData(page));
     259             : 
     260        3007 :     if (btree->fullScan)
     261             :     {
     262           2 :         stack->off = FirstOffsetNumber;
     263           2 :         stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
     264           2 :         return btree->getLeftMostChild(btree, page);
     265             :     }
     266             : 
     267        3005 :     low = FirstOffsetNumber;
     268        3005 :     maxoff = high = GinPageGetOpaque(page)->maxoff;
     269        3005 :     Assert(high >= low);
     270             : 
     271        3005 :     high++;
     272             : 
     273       12020 :     while (high > low)
     274             :     {
     275        6010 :         OffsetNumber mid = low + ((high - low) / 2);
     276             : 
     277        6010 :         pitem = GinDataPageGetPostingItem(page, mid);
     278             : 
     279        6010 :         if (mid == maxoff)
     280             :         {
     281             :             /*
     282             :              * Right infinity, page already correctly chosen with a help of
     283             :              * dataIsMoveRight
     284             :              */
     285        3005 :             result = -1;
     286             :         }
     287             :         else
     288             :         {
     289        3005 :             pitem = GinDataPageGetPostingItem(page, mid);
     290        3005 :             result = ginCompareItemPointers(&btree->itemptr, &(pitem->key));
     291             :         }
     292             : 
     293        6010 :         if (result == 0)
     294             :         {
     295           0 :             stack->off = mid;
     296           0 :             return PostingItemGetBlockNumber(pitem);
     297             :         }
     298        6010 :         else if (result > 0)
     299        3005 :             low = mid + 1;
     300             :         else
     301        3005 :             high = mid;
     302             :     }
     303             : 
     304        3005 :     Assert(high >= FirstOffsetNumber && high <= maxoff);
     305             : 
     306        3005 :     stack->off = high;
     307        3005 :     pitem = GinDataPageGetPostingItem(page, high);
     308        3005 :     return PostingItemGetBlockNumber(pitem);
     309             : }
     310             : 
     311             : /*
     312             :  * Find link to blkno on non-leaf page, returns offset of PostingItem
     313             :  */
     314             : static OffsetNumber
     315           3 : dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
     316             : {
     317             :     OffsetNumber i,
     318           3 :                 maxoff = GinPageGetOpaque(page)->maxoff;
     319             :     PostingItem *pitem;
     320             : 
     321           3 :     Assert(!GinPageIsLeaf(page));
     322           3 :     Assert(GinPageIsData(page));
     323             : 
     324             :     /* if page isn't changed, we return storedOff */
     325           3 :     if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
     326             :     {
     327           3 :         pitem = GinDataPageGetPostingItem(page, storedOff);
     328           3 :         if (PostingItemGetBlockNumber(pitem) == blkno)
     329           3 :             return storedOff;
     330             : 
     331             :         /*
     332             :          * we hope, that needed pointer goes to right. It's true if there
     333             :          * wasn't a deletion
     334             :          */
     335           0 :         for (i = storedOff + 1; i <= maxoff; i++)
     336             :         {
     337           0 :             pitem = GinDataPageGetPostingItem(page, i);
     338           0 :             if (PostingItemGetBlockNumber(pitem) == blkno)
     339           0 :                 return i;
     340             :         }
     341             : 
     342           0 :         maxoff = storedOff - 1;
     343             :     }
     344             : 
     345             :     /* last chance */
     346           0 :     for (i = FirstOffsetNumber; i <= maxoff; i++)
     347             :     {
     348           0 :         pitem = GinDataPageGetPostingItem(page, i);
     349           0 :         if (PostingItemGetBlockNumber(pitem) == blkno)
     350           0 :             return i;
     351             :     }
     352             : 
     353           0 :     return InvalidOffsetNumber;
     354             : }
     355             : 
     356             : /*
     357             :  * Return blkno of leftmost child
     358             :  */
     359             : static BlockNumber
     360           2 : dataGetLeftMostPage(GinBtree btree, Page page)
     361             : {
     362             :     PostingItem *pitem;
     363             : 
     364           2 :     Assert(!GinPageIsLeaf(page));
     365           2 :     Assert(GinPageIsData(page));
     366           2 :     Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
     367             : 
     368           2 :     pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
     369           2 :     return PostingItemGetBlockNumber(pitem);
     370             : }
     371             : 
     372             : /*
     373             :  * Add PostingItem to a non-leaf page.
     374             :  */
     375             : void
     376           9 : GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
     377             : {
     378           9 :     OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
     379             :     char       *ptr;
     380             : 
     381           9 :     Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber);
     382           9 :     Assert(!GinPageIsLeaf(page));
     383             : 
     384           9 :     if (offset == InvalidOffsetNumber)
     385             :     {
     386           6 :         ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1);
     387             :     }
     388             :     else
     389             :     {
     390           3 :         ptr = (char *) GinDataPageGetPostingItem(page, offset);
     391           3 :         if (offset != maxoff + 1)
     392           3 :             memmove(ptr + sizeof(PostingItem),
     393             :                     ptr,
     394           3 :                     (maxoff - offset + 1) * sizeof(PostingItem));
     395             :     }
     396           9 :     memcpy(ptr, data, sizeof(PostingItem));
     397             : 
     398           9 :     maxoff++;
     399           9 :     GinPageGetOpaque(page)->maxoff = maxoff;
     400             : 
     401             :     /*
     402             :      * Also set pd_lower to the end of the posting items, to follow the
     403             :      * "standard" page layout, so that we can squeeze out the unused space
     404             :      * from full-page images.
     405             :      */
     406           9 :     GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
     407           9 : }
     408             : 
     409             : /*
     410             :  * Delete posting item from non-leaf page
     411             :  */
     412             : void
     413           2 : GinPageDeletePostingItem(Page page, OffsetNumber offset)
     414             : {
     415           2 :     OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
     416             : 
     417           2 :     Assert(!GinPageIsLeaf(page));
     418           2 :     Assert(offset >= FirstOffsetNumber && offset <= maxoff);
     419             : 
     420           2 :     if (offset != maxoff)
     421           4 :         memmove(GinDataPageGetPostingItem(page, offset),
     422           2 :                 GinDataPageGetPostingItem(page, offset + 1),
     423           2 :                 sizeof(PostingItem) * (maxoff - offset));
     424             : 
     425           2 :     maxoff--;
     426           2 :     GinPageGetOpaque(page)->maxoff = maxoff;
     427             : 
     428           2 :     GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
     429           2 : }
     430             : 
     431             : /*
     432             :  * Prepare to insert data on a leaf data page.
     433             :  *
     434             :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
     435             :  * before we enter the insertion critical section.  *ptp_workspace can be
     436             :  * set to pass information along to the execPlaceToPage function.
     437             :  *
     438             :  * If it won't fit, perform a page split and return two temporary page
     439             :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
     440             :  *
     441             :  * In neither case should the given page buffer be modified here.
     442             :  */
     443             : static GinPlaceToPageRC
     444        3337 : dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
     445             :                          void *insertdata,
     446             :                          void **ptp_workspace,
     447             :                          Page *newlpage, Page *newrpage)
     448             : {
     449        3337 :     GinBtreeDataLeafInsertData *items = insertdata;
     450        3337 :     ItemPointer newItems = &items->items[items->curitem];
     451        3337 :     int         maxitems = items->nitem - items->curitem;
     452        3337 :     Page        page = BufferGetPage(buf);
     453             :     int         i;
     454             :     ItemPointerData rbound;
     455             :     ItemPointerData lbound;
     456             :     bool        needsplit;
     457             :     bool        append;
     458             :     int         segsize;
     459             :     Size        freespace;
     460             :     disassembledLeaf *leaf;
     461             :     leafSegmentInfo *lastleftinfo;
     462             :     ItemPointerData maxOldItem;
     463             :     ItemPointerData remaining;
     464             : 
     465        3337 :     rbound = *GinDataPageGetRightBound(page);
     466             : 
     467             :     /*
     468             :      * Count how many of the new items belong to this page.
     469             :      */
     470        3337 :     if (!GinPageRightMost(page))
     471             :     {
     472           0 :         for (i = 0; i < maxitems; i++)
     473             :         {
     474           0 :             if (ginCompareItemPointers(&newItems[i], &rbound) > 0)
     475             :             {
     476             :                 /*
     477             :                  * This needs to go to some other location in the tree. (The
     478             :                  * caller should've chosen the insert location so that at
     479             :                  * least the first item goes here.)
     480             :                  */
     481           0 :                 Assert(i > 0);
     482           0 :                 break;
     483             :             }
     484             :         }
     485           0 :         maxitems = i;
     486             :     }
     487             : 
     488             :     /* Disassemble the data on the page */
     489        3337 :     leaf = disassembleLeaf(page);
     490             : 
     491             :     /*
     492             :      * Are we appending to the end of the page? IOW, are all the new items
     493             :      * larger than any of the existing items.
     494             :      */
     495        3337 :     if (!dlist_is_empty(&leaf->segments))
     496             :     {
     497        3337 :         lastleftinfo = dlist_container(leafSegmentInfo, node,
     498             :                                        dlist_tail_node(&leaf->segments));
     499        3337 :         if (!lastleftinfo->items)
     500        3337 :             lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
     501             :                                                        &lastleftinfo->nitems);
     502        3337 :         maxOldItem = lastleftinfo->items[lastleftinfo->nitems - 1];
     503        3337 :         if (ginCompareItemPointers(&newItems[0], &maxOldItem) >= 0)
     504        3337 :             append = true;
     505             :         else
     506           0 :             append = false;
     507             :     }
     508             :     else
     509             :     {
     510           0 :         ItemPointerSetMin(&maxOldItem);
     511           0 :         append = true;
     512             :     }
     513             : 
     514             :     /*
     515             :      * If we're appending to the end of the page, we will append as many items
     516             :      * as we can fit (after splitting), and stop when the pages becomes full.
     517             :      * Otherwise we have to limit the number of new items to insert, because
     518             :      * once we start packing we can't just stop when we run out of space,
     519             :      * because we must make sure that all the old items still fit.
     520             :      */
     521        3337 :     if (GinPageIsCompressed(page))
     522        3337 :         freespace = GinDataLeafPageGetFreeSpace(page);
     523             :     else
     524           0 :         freespace = 0;
     525        3337 :     if (append)
     526             :     {
     527             :         /*
     528             :          * Even when appending, trying to append more items than will fit is
     529             :          * not completely free, because we will merge the new items and old
     530             :          * items into an array below. In the best case, every new item fits in
     531             :          * a single byte, and we can use all the free space on the old page as
     532             :          * well as the new page. For simplicity, ignore segment overhead etc.
     533             :          */
     534        3337 :         maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
     535             :     }
     536             :     else
     537             :     {
     538             :         /*
     539             :          * Calculate a conservative estimate of how many new items we can fit
     540             :          * on the two pages after splitting.
     541             :          *
     542             :          * We can use any remaining free space on the old page to store full
     543             :          * segments, as well as the new page. Each full-sized segment can hold
     544             :          * at least MinTuplesPerSegment items
     545             :          */
     546             :         int         nnewsegments;
     547             : 
     548           0 :         nnewsegments = freespace / GinPostingListSegmentMaxSize;
     549           0 :         nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
     550           0 :         maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
     551             :     }
     552             : 
     553             :     /* Add the new items to the segment list */
     554        3337 :     if (!addItemsToLeaf(leaf, newItems, maxitems))
     555             :     {
     556             :         /* all items were duplicates, we have nothing to do */
     557           0 :         items->curitem += maxitems;
     558             : 
     559           0 :         return GPTP_NO_WORK;
     560             :     }
     561             : 
     562             :     /*
     563             :      * Pack the items back to compressed segments, ready for writing to disk.
     564             :      */
     565        3337 :     needsplit = leafRepackItems(leaf, &remaining);
     566             : 
     567             :     /*
     568             :      * Did all the new items fit?
     569             :      *
     570             :      * If we're appending, it's OK if they didn't. But as a sanity check,
     571             :      * verify that all the old items fit.
     572             :      */
     573        3337 :     if (ItemPointerIsValid(&remaining))
     574             :     {
     575           2 :         if (!append || ItemPointerCompare(&maxOldItem, &remaining) >= 0)
     576           0 :             elog(ERROR, "could not split GIN page; all old items didn't fit");
     577             : 
     578             :         /* Count how many of the new items did fit. */
     579       15318 :         for (i = 0; i < maxitems; i++)
     580             :         {
     581       15318 :             if (ginCompareItemPointers(&newItems[i], &remaining) >= 0)
     582           2 :                 break;
     583             :         }
     584           2 :         if (i == 0)
     585           0 :             elog(ERROR, "could not split GIN page; no new items fit");
     586           2 :         maxitems = i;
     587             :     }
     588             : 
     589        3337 :     if (!needsplit)
     590             :     {
     591             :         /*
     592             :          * Great, all the items fit on a single page.  If needed, prepare data
     593             :          * for a WAL record describing the changes we'll make.
     594             :          */
     595        3331 :         if (RelationNeedsWAL(btree->index))
     596        3331 :             computeLeafRecompressWALData(leaf);
     597             : 
     598             :         /*
     599             :          * We're ready to enter the critical section, but
     600             :          * dataExecPlaceToPageLeaf will need access to the "leaf" data.
     601             :          */
     602        3331 :         *ptp_workspace = leaf;
     603             : 
     604        3331 :         if (append)
     605        3331 :             elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
     606             :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
     607             :                  items->nitem - items->curitem - maxitems);
     608             :         else
     609           0 :             elog(DEBUG2, "inserted %d new items to block %u; %d bytes (%d to go)",
     610             :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
     611             :                  items->nitem - items->curitem - maxitems);
     612             :     }
     613             :     else
     614             :     {
     615             :         /*
     616             :          * Have to split.
     617             :          *
     618             :          * leafRepackItems already divided the segments between the left and
     619             :          * the right page. It filled the left page as full as possible, and
     620             :          * put the rest to the right page. When building a new index, that's
     621             :          * good, because the table is scanned from beginning to end and there
     622             :          * won't be any more insertions to the left page during the build.
     623             :          * This packs the index as tight as possible. But otherwise, split
     624             :          * 50/50, by moving segments from the left page to the right page
     625             :          * until they're balanced.
     626             :          *
     627             :          * As a further heuristic, when appending items to the end of the
     628             :          * page, try to make the left page 75% full, on the assumption that
     629             :          * subsequent insertions will probably also go to the end. This packs
     630             :          * the index somewhat tighter when appending to a table, which is very
     631             :          * common.
     632             :          */
     633           6 :         if (!btree->isBuild)
     634             :         {
     635          27 :             while (dlist_has_prev(&leaf->segments, leaf->lastleft))
     636             :             {
     637          22 :                 lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
     638             : 
     639             :                 /* ignore deleted segments */
     640          22 :                 if (lastleftinfo->action != GIN_SEGMENT_DELETE)
     641             :                 {
     642          22 :                     segsize = SizeOfGinPostingList(lastleftinfo->seg);
     643             : 
     644             :                     /*
     645             :                      * Note that we check that the right page doesn't become
     646             :                      * more full than the left page even when appending. It's
     647             :                      * possible that we added enough items to make both pages
     648             :                      * more than 75% full.
     649             :                      */
     650          22 :                     if ((leaf->lsize - segsize) - (leaf->rsize + segsize) < 0)
     651           4 :                         break;
     652          18 :                     if (append)
     653             :                     {
     654          18 :                         if ((leaf->lsize - segsize) < (BLCKSZ * 3) / 4)
     655           1 :                             break;
     656             :                     }
     657             : 
     658          17 :                     leaf->lsize -= segsize;
     659          17 :                     leaf->rsize += segsize;
     660             :                 }
     661          17 :                 leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
     662             :             }
     663             :         }
     664           6 :         Assert(leaf->lsize <= GinDataPageMaxDataSize);
     665           6 :         Assert(leaf->rsize <= GinDataPageMaxDataSize);
     666             : 
     667             :         /*
     668             :          * Fetch the max item in the left page's last segment; it becomes the
     669             :          * right bound of the page.
     670             :          */
     671           6 :         lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
     672           6 :         if (!lastleftinfo->items)
     673           6 :             lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
     674             :                                                        &lastleftinfo->nitems);
     675           6 :         lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
     676             : 
     677             :         /*
     678             :          * Now allocate a couple of temporary page images, and fill them.
     679             :          */
     680           6 :         *newlpage = palloc(BLCKSZ);
     681           6 :         *newrpage = palloc(BLCKSZ);
     682             : 
     683           6 :         dataPlaceToPageLeafSplit(leaf, lbound, rbound,
     684             :                                  *newlpage, *newrpage);
     685             : 
     686           6 :         Assert(GinPageRightMost(page) ||
     687             :                ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
     688             :                                       GinDataPageGetRightBound(*newrpage)) < 0);
     689             : 
     690           6 :         if (append)
     691           6 :             elog(DEBUG2, "appended %d items to block %u; split %d/%d (%d to go)",
     692             :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
     693             :                  items->nitem - items->curitem - maxitems);
     694             :         else
     695           0 :             elog(DEBUG2, "inserted %d items to block %u; split %d/%d (%d to go)",
     696             :                  maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
     697             :                  items->nitem - items->curitem - maxitems);
     698             :     }
     699             : 
     700        3337 :     items->curitem += maxitems;
     701             : 
     702        3337 :     return needsplit ? GPTP_SPLIT : GPTP_INSERT;
     703             : }
     704             : 
     705             : /*
     706             :  * Perform data insertion after beginPlaceToPage has decided it will fit.
     707             :  *
     708             :  * This is invoked within a critical section, and XLOG record creation (if
     709             :  * needed) is already started.  The target buffer is registered in slot 0.
     710             :  */
     711             : static void
     712        3331 : dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
     713             :                         void *insertdata, void *ptp_workspace)
     714             : {
     715        3331 :     disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
     716             : 
     717             :     /* Apply changes to page */
     718        3331 :     dataPlaceToPageLeafRecompress(buf, leaf);
     719             : 
     720             :     /* If needed, register WAL data built by computeLeafRecompressWALData */
     721        3331 :     if (RelationNeedsWAL(btree->index))
     722             :     {
     723        3331 :         XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
     724             :     }
     725        3331 : }
     726             : 
     727             : /*
     728             :  * Vacuum a posting tree leaf page.
     729             :  */
     730             : void
     731           8 : ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
     732             : {
     733           8 :     Page        page = BufferGetPage(buffer);
     734             :     disassembledLeaf *leaf;
     735           8 :     bool        removedsomething = false;
     736             :     dlist_iter  iter;
     737             : 
     738           8 :     leaf = disassembleLeaf(page);
     739             : 
     740             :     /* Vacuum each segment. */
     741         179 :     dlist_foreach(iter, &leaf->segments)
     742             :     {
     743         171 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
     744             :         int         oldsegsize;
     745             :         ItemPointer cleaned;
     746             :         int         ncleaned;
     747             : 
     748         171 :         if (!seginfo->items)
     749         171 :             seginfo->items = ginPostingListDecode(seginfo->seg,
     750             :                                                   &seginfo->nitems);
     751         171 :         if (seginfo->seg)
     752         171 :             oldsegsize = SizeOfGinPostingList(seginfo->seg);
     753             :         else
     754           0 :             oldsegsize = GinDataPageMaxDataSize;
     755             : 
     756         171 :         cleaned = ginVacuumItemPointers(gvs,
     757             :                                         seginfo->items,
     758             :                                         seginfo->nitems,
     759             :                                         &ncleaned);
     760         171 :         pfree(seginfo->items);
     761         171 :         seginfo->items = NULL;
     762         171 :         seginfo->nitems = 0;
     763         171 :         if (cleaned)
     764             :         {
     765         145 :             if (ncleaned > 0)
     766             :             {
     767             :                 int         npacked;
     768             : 
     769           5 :                 seginfo->seg = ginCompressPostingList(cleaned,
     770             :                                                       ncleaned,
     771             :                                                       oldsegsize,
     772             :                                                       &npacked);
     773             :                 /* Removing an item never increases the size of the segment */
     774           5 :                 if (npacked != ncleaned)
     775           0 :                     elog(ERROR, "could not fit vacuumed posting list");
     776           5 :                 seginfo->action = GIN_SEGMENT_REPLACE;
     777             :             }
     778             :             else
     779             :             {
     780         140 :                 seginfo->seg = NULL;
     781         140 :                 seginfo->items = NULL;
     782         140 :                 seginfo->action = GIN_SEGMENT_DELETE;
     783             :             }
     784         145 :             seginfo->nitems = ncleaned;
     785             : 
     786         145 :             removedsomething = true;
     787             :         }
     788             :     }
     789             : 
     790             :     /*
     791             :      * If we removed any items, reconstruct the page from the pieces.
     792             :      *
     793             :      * We don't try to re-encode the segments here, even though some of them
     794             :      * might be really small now that we've removed some items from them. It
     795             :      * seems like a waste of effort, as there isn't really any benefit from
     796             :      * larger segments per se; larger segments only help to pack more items in
     797             :      * the same space. We might as well delay doing that until the next
     798             :      * insertion, which will need to re-encode at least part of the page
     799             :      * anyway.
     800             :      *
     801             :      * Also note if the page was in uncompressed, pre-9.4 format before, it is
     802             :      * now represented as one huge segment that contains all the items. It
     803             :      * might make sense to split that, to speed up random access, but we don't
     804             :      * bother. You'll have to REINDEX anyway if you want the full gain of the
     805             :      * new tighter index format.
     806             :      */
     807           8 :     if (removedsomething)
     808             :     {
     809             :         bool        modified;
     810             : 
     811             :         /*
     812             :          * Make sure we have a palloc'd copy of all segments, after the first
     813             :          * segment that is modified. (dataPlaceToPageLeafRecompress requires
     814             :          * this).
     815             :          */
     816           8 :         modified = false;
     817         179 :         dlist_foreach(iter, &leaf->segments)
     818             :         {
     819         171 :             leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     820             :                                                        iter.cur);
     821             : 
     822         171 :             if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
     823         145 :                 modified = true;
     824         171 :             if (modified && seginfo->action != GIN_SEGMENT_DELETE)
     825             :             {
     826          29 :                 int         segsize = SizeOfGinPostingList(seginfo->seg);
     827          29 :                 GinPostingList *tmp = (GinPostingList *) palloc(segsize);
     828             : 
     829          29 :                 memcpy(tmp, seginfo->seg, segsize);
     830          29 :                 seginfo->seg = tmp;
     831             :             }
     832             :         }
     833             : 
     834           8 :         if (RelationNeedsWAL(indexrel))
     835           8 :             computeLeafRecompressWALData(leaf);
     836             : 
     837             :         /* Apply changes to page */
     838           8 :         START_CRIT_SECTION();
     839             : 
     840           8 :         dataPlaceToPageLeafRecompress(buffer, leaf);
     841             : 
     842           8 :         MarkBufferDirty(buffer);
     843             : 
     844           8 :         if (RelationNeedsWAL(indexrel))
     845             :         {
     846             :             XLogRecPtr  recptr;
     847             : 
     848           8 :             XLogBeginInsert();
     849           8 :             XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
     850           8 :             XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
     851           8 :             recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
     852           8 :             PageSetLSN(page, recptr);
     853             :         }
     854             : 
     855           8 :         END_CRIT_SECTION();
     856             :     }
     857           8 : }
     858             : 
     859             : /*
     860             :  * Construct a ginxlogRecompressDataLeaf record representing the changes
     861             :  * in *leaf.  (Because this requires a palloc, we have to do it before
     862             :  * we enter the critical section that actually updates the page.)
     863             :  */
     864             : static void
     865        3339 : computeLeafRecompressWALData(disassembledLeaf *leaf)
     866             : {
     867        3339 :     int         nmodified = 0;
     868             :     char       *walbufbegin;
     869             :     char       *walbufend;
     870             :     dlist_iter  iter;
     871             :     int         segno;
     872             :     ginxlogRecompressDataLeaf *recompress_xlog;
     873             : 
     874             :     /* Count the modified segments */
     875       63452 :     dlist_foreach(iter, &leaf->segments)
     876             :     {
     877       60113 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     878             :                                                    iter.cur);
     879             : 
     880       60113 :         if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
     881        3479 :             nmodified++;
     882             :     }
     883             : 
     884        3339 :     walbufbegin =
     885        3339 :         palloc(sizeof(ginxlogRecompressDataLeaf) +
     886             :                BLCKSZ +         /* max size needed to hold the segment data */
     887        3339 :                nmodified * 2    /* (segno + action) per action */
     888             :         );
     889        3339 :     walbufend = walbufbegin;
     890             : 
     891        3339 :     recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
     892        3339 :     walbufend += sizeof(ginxlogRecompressDataLeaf);
     893             : 
     894        3339 :     recompress_xlog->nactions = nmodified;
     895             : 
     896        3339 :     segno = 0;
     897       63452 :     dlist_foreach(iter, &leaf->segments)
     898             :     {
     899       60113 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
     900             :                                                    iter.cur);
     901       60113 :         int         segsize = 0;
     902             :         int         datalen;
     903       60113 :         uint8       action = seginfo->action;
     904             : 
     905       60113 :         if (action == GIN_SEGMENT_UNMODIFIED)
     906             :         {
     907       56634 :             segno++;
     908       56634 :             continue;
     909             :         }
     910             : 
     911        3479 :         if (action != GIN_SEGMENT_DELETE)
     912        3339 :             segsize = SizeOfGinPostingList(seginfo->seg);
     913             : 
     914             :         /*
     915             :          * If storing the uncompressed list of added item pointers would take
     916             :          * more space than storing the compressed segment as is, do that
     917             :          * instead.
     918             :          */
     919        6794 :         if (action == GIN_SEGMENT_ADDITEMS &&
     920        3315 :             seginfo->nmodifieditems * sizeof(ItemPointerData) > segsize)
     921             :         {
     922           0 :             action = GIN_SEGMENT_REPLACE;
     923             :         }
     924             : 
     925        3479 :         *((uint8 *) (walbufend++)) = segno;
     926        3479 :         *(walbufend++) = action;
     927             : 
     928        3479 :         switch (action)
     929             :         {
     930             :             case GIN_SEGMENT_DELETE:
     931         140 :                 datalen = 0;
     932         140 :                 break;
     933             : 
     934             :             case GIN_SEGMENT_ADDITEMS:
     935        3315 :                 datalen = seginfo->nmodifieditems * sizeof(ItemPointerData);
     936        3315 :                 memcpy(walbufend, &seginfo->nmodifieditems, sizeof(uint16));
     937        3315 :                 memcpy(walbufend + sizeof(uint16), seginfo->modifieditems, datalen);
     938        3315 :                 datalen += sizeof(uint16);
     939        3315 :                 break;
     940             : 
     941             :             case GIN_SEGMENT_INSERT:
     942             :             case GIN_SEGMENT_REPLACE:
     943          24 :                 datalen = SHORTALIGN(segsize);
     944          24 :                 memcpy(walbufend, seginfo->seg, segsize);
     945          24 :                 break;
     946             : 
     947             :             default:
     948           0 :                 elog(ERROR, "unexpected GIN leaf action %d", action);
     949             :         }
     950        3479 :         walbufend += datalen;
     951             : 
     952        3479 :         if (action != GIN_SEGMENT_INSERT)
     953        3460 :             segno++;
     954             :     }
     955             : 
     956             :     /* Pass back the constructed info via *leaf */
     957        3339 :     leaf->walinfo = walbufbegin;
     958        3339 :     leaf->walinfolen = walbufend - walbufbegin;
     959        3339 : }
     960             : 
     961             : /*
     962             :  * Assemble a disassembled posting tree leaf page back to a buffer.
     963             :  *
     964             :  * This just updates the target buffer; WAL stuff is caller's responsibility.
     965             :  *
     966             :  * NOTE: The segment pointers must not point directly to the same buffer,
     967             :  * except for segments that have not been modified and whose preceding
     968             :  * segments have not been modified either.
     969             :  */
     970             : static void
     971        3339 : dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
     972             : {
     973        3339 :     Page        page = BufferGetPage(buf);
     974             :     char       *ptr;
     975             :     int         newsize;
     976        3339 :     bool        modified = false;
     977             :     dlist_iter  iter;
     978             :     int         segsize;
     979             : 
     980             :     /*
     981             :      * If the page was in pre-9.4 format before, convert the header, and force
     982             :      * all segments to be copied to the page whether they were modified or
     983             :      * not.
     984             :      */
     985        3339 :     if (!GinPageIsCompressed(page))
     986             :     {
     987           0 :         Assert(leaf->oldformat);
     988           0 :         GinPageSetCompressed(page);
     989           0 :         GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
     990           0 :         modified = true;
     991             :     }
     992             : 
     993        3339 :     ptr = (char *) GinDataLeafPageGetPostingList(page);
     994        3339 :     newsize = 0;
     995       63452 :     dlist_foreach(iter, &leaf->segments)
     996             :     {
     997       60113 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
     998             : 
     999       60113 :         if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
    1000        3479 :             modified = true;
    1001             : 
    1002       60113 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1003             :         {
    1004       59973 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1005             : 
    1006       59973 :             if (modified)
    1007        3363 :                 memcpy(ptr, seginfo->seg, segsize);
    1008             : 
    1009       59973 :             ptr += segsize;
    1010       59973 :             newsize += segsize;
    1011             :         }
    1012             :     }
    1013             : 
    1014        3339 :     Assert(newsize <= GinDataPageMaxDataSize);
    1015        3339 :     GinDataPageSetDataSize(page, newsize);
    1016        3339 : }
    1017             : 
    1018             : /*
    1019             :  * Like dataPlaceToPageLeafRecompress, but writes the disassembled leaf
    1020             :  * segments to two pages instead of one.
    1021             :  *
    1022             :  * This is different from the non-split cases in that this does not modify
    1023             :  * the original page directly, but writes to temporary in-memory copies of
    1024             :  * the new left and right pages.
    1025             :  */
    1026             : static void
    1027           6 : dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
    1028             :                          ItemPointerData lbound, ItemPointerData rbound,
    1029             :                          Page lpage, Page rpage)
    1030             : {
    1031             :     char       *ptr;
    1032             :     int         segsize;
    1033             :     int         lsize;
    1034             :     int         rsize;
    1035             :     dlist_node *node;
    1036             :     dlist_node *firstright;
    1037             :     leafSegmentInfo *seginfo;
    1038             : 
    1039             :     /* Initialize temporary pages to hold the new left and right pages */
    1040           6 :     GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1041           6 :     GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1042             : 
    1043             :     /*
    1044             :      * Copy the segments that go to the left page.
    1045             :      *
    1046             :      * XXX: We should skip copying the unmodified part of the left page, like
    1047             :      * we do when recompressing.
    1048             :      */
    1049           6 :     lsize = 0;
    1050           6 :     ptr = (char *) GinDataLeafPageGetPostingList(lpage);
    1051           6 :     firstright = dlist_next_node(&leaf->segments, leaf->lastleft);
    1052         151 :     for (node = dlist_head_node(&leaf->segments);
    1053             :          node != firstright;
    1054         139 :          node = dlist_next_node(&leaf->segments, node))
    1055             :     {
    1056         139 :         seginfo = dlist_container(leafSegmentInfo, node, node);
    1057             : 
    1058         139 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1059             :         {
    1060         139 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1061         139 :             memcpy(ptr, seginfo->seg, segsize);
    1062         139 :             ptr += segsize;
    1063         139 :             lsize += segsize;
    1064             :         }
    1065             :     }
    1066           6 :     Assert(lsize == leaf->lsize);
    1067           6 :     GinDataPageSetDataSize(lpage, lsize);
    1068           6 :     *GinDataPageGetRightBound(lpage) = lbound;
    1069             : 
    1070             :     /* Copy the segments that go to the right page */
    1071           6 :     ptr = (char *) GinDataLeafPageGetPostingList(rpage);
    1072           6 :     rsize = 0;
    1073           6 :     for (node = firstright;
    1074             :          ;
    1075         123 :          node = dlist_next_node(&leaf->segments, node))
    1076             :     {
    1077         129 :         seginfo = dlist_container(leafSegmentInfo, node, node);
    1078             : 
    1079         129 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1080             :         {
    1081         129 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1082         129 :             memcpy(ptr, seginfo->seg, segsize);
    1083         129 :             ptr += segsize;
    1084         129 :             rsize += segsize;
    1085             :         }
    1086             : 
    1087         129 :         if (!dlist_has_next(&leaf->segments, node))
    1088           6 :             break;
    1089         123 :     }
    1090           6 :     Assert(rsize == leaf->rsize);
    1091           6 :     GinDataPageSetDataSize(rpage, rsize);
    1092           6 :     *GinDataPageGetRightBound(rpage) = rbound;
    1093           6 : }
    1094             : 
    1095             : /*
    1096             :  * Prepare to insert data on an internal data page.
    1097             :  *
    1098             :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
    1099             :  * before we enter the insertion critical section.  *ptp_workspace can be
    1100             :  * set to pass information along to the execPlaceToPage function.
    1101             :  *
    1102             :  * If it won't fit, perform a page split and return two temporary page
    1103             :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
    1104             :  *
    1105             :  * In neither case should the given page buffer be modified here.
    1106             :  *
    1107             :  * Note: on insertion to an internal node, in addition to inserting the given
    1108             :  * item, the downlink of the existing item at stack->off will be updated to
    1109             :  * point to updateblkno.
    1110             :  */
    1111             : static GinPlaceToPageRC
    1112           3 : dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1113             :                              void *insertdata, BlockNumber updateblkno,
    1114             :                              void **ptp_workspace,
    1115             :                              Page *newlpage, Page *newrpage)
    1116             : {
    1117           3 :     Page        page = BufferGetPage(buf);
    1118             : 
    1119             :     /* If it doesn't fit, deal with split case */
    1120           3 :     if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
    1121             :     {
    1122           0 :         dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
    1123             :                               newlpage, newrpage);
    1124           0 :         return GPTP_SPLIT;
    1125             :     }
    1126             : 
    1127             :     /* Else, we're ready to proceed with insertion */
    1128           3 :     return GPTP_INSERT;
    1129             : }
    1130             : 
    1131             : /*
    1132             :  * Perform data insertion after beginPlaceToPage has decided it will fit.
    1133             :  *
    1134             :  * This is invoked within a critical section, and XLOG record creation (if
    1135             :  * needed) is already started.  The target buffer is registered in slot 0.
    1136             :  */
    1137             : static void
    1138           3 : dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1139             :                             void *insertdata, BlockNumber updateblkno,
    1140             :                             void *ptp_workspace)
    1141             : {
    1142           3 :     Page        page = BufferGetPage(buf);
    1143           3 :     OffsetNumber off = stack->off;
    1144             :     PostingItem *pitem;
    1145             : 
    1146             :     /* Update existing downlink to point to next page (on internal page) */
    1147           3 :     pitem = GinDataPageGetPostingItem(page, off);
    1148           3 :     PostingItemSetBlockNumber(pitem, updateblkno);
    1149             : 
    1150             :     /* Add new item */
    1151           3 :     pitem = (PostingItem *) insertdata;
    1152           3 :     GinDataPageAddPostingItem(page, pitem, off);
    1153             : 
    1154           3 :     if (RelationNeedsWAL(btree->index))
    1155             :     {
    1156             :         /*
    1157             :          * This must be static, because it has to survive until XLogInsert,
    1158             :          * and we can't palloc here.  Ugly, but the XLogInsert infrastructure
    1159             :          * isn't reentrant anyway.
    1160             :          */
    1161             :         static ginxlogInsertDataInternal data;
    1162             : 
    1163           3 :         data.offset = off;
    1164           3 :         data.newitem = *pitem;
    1165             : 
    1166           3 :         XLogRegisterBufData(0, (char *) &data,
    1167             :                             sizeof(ginxlogInsertDataInternal));
    1168             :     }
    1169           3 : }
    1170             : 
    1171             : /*
    1172             :  * Prepare to insert data on a posting-tree data page.
    1173             :  *
    1174             :  * If it will fit, return GPTP_INSERT after doing whatever setup is needed
    1175             :  * before we enter the insertion critical section.  *ptp_workspace can be
    1176             :  * set to pass information along to the execPlaceToPage function.
    1177             :  *
    1178             :  * If it won't fit, perform a page split and return two temporary page
    1179             :  * images into *newlpage and *newrpage, with result GPTP_SPLIT.
    1180             :  *
    1181             :  * In neither case should the given page buffer be modified here.
    1182             :  *
    1183             :  * Note: on insertion to an internal node, in addition to inserting the given
    1184             :  * item, the downlink of the existing item at stack->off will be updated to
    1185             :  * point to updateblkno.
    1186             :  *
    1187             :  * Calls relevant function for internal or leaf page because they are handled
    1188             :  * very differently.
    1189             :  */
    1190             : static GinPlaceToPageRC
    1191        3340 : dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1192             :                      void *insertdata, BlockNumber updateblkno,
    1193             :                      void **ptp_workspace,
    1194             :                      Page *newlpage, Page *newrpage)
    1195             : {
    1196        3340 :     Page        page = BufferGetPage(buf);
    1197             : 
    1198        3340 :     Assert(GinPageIsData(page));
    1199             : 
    1200        3340 :     if (GinPageIsLeaf(page))
    1201        3337 :         return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
    1202             :                                         ptp_workspace,
    1203             :                                         newlpage, newrpage);
    1204             :     else
    1205           3 :         return dataBeginPlaceToPageInternal(btree, buf, stack,
    1206             :                                             insertdata, updateblkno,
    1207             :                                             ptp_workspace,
    1208             :                                             newlpage, newrpage);
    1209             : }
    1210             : 
    1211             : /*
    1212             :  * Perform data insertion after beginPlaceToPage has decided it will fit.
    1213             :  *
    1214             :  * This is invoked within a critical section, and XLOG record creation (if
    1215             :  * needed) is already started.  The target buffer is registered in slot 0.
    1216             :  *
    1217             :  * Calls relevant function for internal or leaf page because they are handled
    1218             :  * very differently.
    1219             :  */
    1220             : static void
    1221        3334 : dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
    1222             :                     void *insertdata, BlockNumber updateblkno,
    1223             :                     void *ptp_workspace)
    1224             : {
    1225        3334 :     Page        page = BufferGetPage(buf);
    1226             : 
    1227        3334 :     if (GinPageIsLeaf(page))
    1228        3331 :         dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
    1229             :                                 ptp_workspace);
    1230             :     else
    1231           3 :         dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
    1232             :                                     updateblkno, ptp_workspace);
    1233        3334 : }
    1234             : 
    1235             : /*
    1236             :  * Split internal page and insert new data.
    1237             :  *
    1238             :  * Returns new temp pages to *newlpage and *newrpage.
    1239             :  * The original buffer is left untouched.
    1240             :  */
    1241             : static void
    1242           0 : dataSplitPageInternal(GinBtree btree, Buffer origbuf,
    1243             :                       GinBtreeStack *stack,
    1244             :                       void *insertdata, BlockNumber updateblkno,
    1245             :                       Page *newlpage, Page *newrpage)
    1246             : {
    1247           0 :     Page        oldpage = BufferGetPage(origbuf);
    1248           0 :     OffsetNumber off = stack->off;
    1249           0 :     int         nitems = GinPageGetOpaque(oldpage)->maxoff;
    1250             :     int         nleftitems;
    1251             :     int         nrightitems;
    1252           0 :     Size        pageSize = PageGetPageSize(oldpage);
    1253           0 :     ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
    1254             :     ItemPointer bound;
    1255             :     Page        lpage;
    1256             :     Page        rpage;
    1257             :     OffsetNumber separator;
    1258             :     PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
    1259             : 
    1260           0 :     lpage = PageGetTempPage(oldpage);
    1261           0 :     rpage = PageGetTempPage(oldpage);
    1262           0 :     GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
    1263           0 :     GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
    1264             : 
    1265             :     /*
    1266             :      * First construct a new list of PostingItems, which includes all the old
    1267             :      * items, and the new item.
    1268             :      */
    1269           0 :     memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
    1270             :            (off - 1) * sizeof(PostingItem));
    1271             : 
    1272           0 :     allitems[off - 1] = *((PostingItem *) insertdata);
    1273           0 :     memcpy(&allitems[off], GinDataPageGetPostingItem(oldpage, off),
    1274           0 :            (nitems - (off - 1)) * sizeof(PostingItem));
    1275           0 :     nitems++;
    1276             : 
    1277             :     /* Update existing downlink to point to next page */
    1278           0 :     PostingItemSetBlockNumber(&allitems[off], updateblkno);
    1279             : 
    1280             :     /*
    1281             :      * When creating a new index, fit as many tuples as possible on the left
    1282             :      * page, on the assumption that the table is scanned from beginning to
    1283             :      * end. This packs the index as tight as possible.
    1284             :      */
    1285           0 :     if (btree->isBuild && GinPageRightMost(oldpage))
    1286           0 :         separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
    1287             :     else
    1288           0 :         separator = nitems / 2;
    1289           0 :     nleftitems = separator;
    1290           0 :     nrightitems = nitems - separator;
    1291             : 
    1292           0 :     memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
    1293             :            allitems,
    1294             :            nleftitems * sizeof(PostingItem));
    1295           0 :     GinPageGetOpaque(lpage)->maxoff = nleftitems;
    1296           0 :     memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
    1297           0 :            &allitems[separator],
    1298             :            nrightitems * sizeof(PostingItem));
    1299           0 :     GinPageGetOpaque(rpage)->maxoff = nrightitems;
    1300             : 
    1301             :     /*
    1302             :      * Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
    1303             :      */
    1304           0 :     GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
    1305           0 :     GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
    1306             : 
    1307             :     /* set up right bound for left page */
    1308           0 :     bound = GinDataPageGetRightBound(lpage);
    1309           0 :     *bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
    1310             : 
    1311             :     /* set up right bound for right page */
    1312           0 :     *GinDataPageGetRightBound(rpage) = oldbound;
    1313             : 
    1314             :     /* return temp pages to caller */
    1315           0 :     *newlpage = lpage;
    1316           0 :     *newrpage = rpage;
    1317           0 : }
    1318             : 
    1319             : /*
    1320             :  * Construct insertion payload for inserting the downlink for given buffer.
    1321             :  */
    1322             : static void *
    1323           3 : dataPrepareDownlink(GinBtree btree, Buffer lbuf)
    1324             : {
    1325           3 :     PostingItem *pitem = palloc(sizeof(PostingItem));
    1326           3 :     Page        lpage = BufferGetPage(lbuf);
    1327             : 
    1328           3 :     PostingItemSetBlockNumber(pitem, BufferGetBlockNumber(lbuf));
    1329           3 :     pitem->key = *GinDataPageGetRightBound(lpage);
    1330             : 
    1331           3 :     return pitem;
    1332             : }
    1333             : 
    1334             : /*
    1335             :  * Fills new root by right bound values from child.
    1336             :  * Also called from ginxlog, should not use btree
    1337             :  */
    1338             : void
    1339           3 : ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
    1340             : {
    1341             :     PostingItem li,
    1342             :                 ri;
    1343             : 
    1344           3 :     li.key = *GinDataPageGetRightBound(lpage);
    1345           3 :     PostingItemSetBlockNumber(&li, lblkno);
    1346           3 :     GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
    1347             : 
    1348           3 :     ri.key = *GinDataPageGetRightBound(rpage);
    1349           3 :     PostingItemSetBlockNumber(&ri, rblkno);
    1350           3 :     GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
    1351           3 : }
    1352             : 
    1353             : 
    1354             : /*** Functions to work with disassembled leaf pages ***/
    1355             : 
    1356             : /*
    1357             :  * Disassemble page into a disassembledLeaf struct.
    1358             :  */
    1359             : static disassembledLeaf *
    1360        3345 : disassembleLeaf(Page page)
    1361             : {
    1362             :     disassembledLeaf *leaf;
    1363             :     GinPostingList *seg;
    1364             :     Pointer     segbegin;
    1365             :     Pointer     segend;
    1366             : 
    1367        3345 :     leaf = palloc0(sizeof(disassembledLeaf));
    1368        3345 :     dlist_init(&leaf->segments);
    1369             : 
    1370        3345 :     if (GinPageIsCompressed(page))
    1371             :     {
    1372             :         /*
    1373             :          * Create a leafSegment entry for each segment.
    1374             :          */
    1375        3345 :         seg = GinDataLeafPageGetPostingList(page);
    1376        3345 :         segbegin = (Pointer) seg;
    1377        3345 :         segend = segbegin + GinDataLeafPageGetPostingListSize(page);
    1378       66941 :         while ((Pointer) seg < segend)
    1379             :         {
    1380       60251 :             leafSegmentInfo *seginfo = palloc(sizeof(leafSegmentInfo));
    1381             : 
    1382       60251 :             seginfo->action = GIN_SEGMENT_UNMODIFIED;
    1383       60251 :             seginfo->seg = seg;
    1384       60251 :             seginfo->items = NULL;
    1385       60251 :             seginfo->nitems = 0;
    1386       60251 :             dlist_push_tail(&leaf->segments, &seginfo->node);
    1387             : 
    1388       60251 :             seg = GinNextPostingListSegment(seg);
    1389             :         }
    1390        3345 :         leaf->oldformat = false;
    1391             :     }
    1392             :     else
    1393             :     {
    1394             :         /*
    1395             :          * A pre-9.4 format uncompressed page is represented by a single
    1396             :          * segment, with an array of items.
    1397             :          */
    1398             :         ItemPointer uncompressed;
    1399             :         int         nuncompressed;
    1400             :         leafSegmentInfo *seginfo;
    1401             : 
    1402           0 :         uncompressed = dataLeafPageGetUncompressed(page, &nuncompressed);
    1403             : 
    1404           0 :         seginfo = palloc(sizeof(leafSegmentInfo));
    1405             : 
    1406           0 :         seginfo->action = GIN_SEGMENT_REPLACE;
    1407           0 :         seginfo->seg = NULL;
    1408           0 :         seginfo->items = palloc(nuncompressed * sizeof(ItemPointerData));
    1409           0 :         memcpy(seginfo->items, uncompressed, nuncompressed * sizeof(ItemPointerData));
    1410           0 :         seginfo->nitems = nuncompressed;
    1411             : 
    1412           0 :         dlist_push_tail(&leaf->segments, &seginfo->node);
    1413             : 
    1414           0 :         leaf->oldformat = true;
    1415             :     }
    1416             : 
    1417        3345 :     return leaf;
    1418             : }
    1419             : 
    1420             : /*
    1421             :  * Distribute newItems to the segments.
    1422             :  *
    1423             :  * Any segments that acquire new items are decoded, and the new items are
    1424             :  * merged with the old items.
    1425             :  *
    1426             :  * Returns true if any new items were added. False means they were all
    1427             :  * duplicates of existing items on the page.
    1428             :  */
    1429             : static bool
    1430        3337 : addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
    1431             : {
    1432             :     dlist_iter  iter;
    1433        3337 :     ItemPointer nextnew = newItems;
    1434        3337 :     int         newleft = nNewItems;
    1435        3337 :     bool        modified = false;
    1436             :     leafSegmentInfo *newseg;
    1437             : 
    1438             :     /*
    1439             :      * If the page is completely empty, just construct one new segment to hold
    1440             :      * all the new items.
    1441             :      */
    1442        3337 :     if (dlist_is_empty(&leaf->segments))
    1443             :     {
    1444           0 :         newseg = palloc(sizeof(leafSegmentInfo));
    1445           0 :         newseg->seg = NULL;
    1446           0 :         newseg->items = newItems;
    1447           0 :         newseg->nitems = nNewItems;
    1448           0 :         newseg->action = GIN_SEGMENT_INSERT;
    1449           0 :         dlist_push_tail(&leaf->segments, &newseg->node);
    1450           0 :         return true;
    1451             :     }
    1452             : 
    1453       60080 :     dlist_foreach(iter, &leaf->segments)
    1454             :     {
    1455       60080 :         leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
    1456             :         int         nthis;
    1457             :         ItemPointer tmpitems;
    1458             :         int         ntmpitems;
    1459             : 
    1460             :         /*
    1461             :          * How many of the new items fall into this segment?
    1462             :          */
    1463       60080 :         if (!dlist_has_next(&leaf->segments, iter.cur))
    1464        3337 :             nthis = newleft;
    1465             :         else
    1466             :         {
    1467             :             leafSegmentInfo *next;
    1468             :             ItemPointerData next_first;
    1469             : 
    1470       56743 :             next = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node,
    1471             :                                                        dlist_next_node(&leaf->segments, iter.cur));
    1472       56743 :             if (next->items)
    1473        3337 :                 next_first = next->items[0];
    1474             :             else
    1475             :             {
    1476       53406 :                 Assert(next->seg != NULL);
    1477       53406 :                 next_first = next->seg->first;
    1478             :             }
    1479             : 
    1480       56743 :             nthis = 0;
    1481      113486 :             while (nthis < newleft && ginCompareItemPointers(&nextnew[nthis], &next_first) < 0)
    1482           0 :                 nthis++;
    1483             :         }
    1484       60080 :         if (nthis == 0)
    1485       56743 :             continue;
    1486             : 
    1487             :         /* Merge the new items with the existing items. */
    1488        3337 :         if (!cur->items)
    1489           0 :             cur->items = ginPostingListDecode(cur->seg, &cur->nitems);
    1490             : 
    1491             :         /*
    1492             :          * Fast path for the important special case that we're appending to
    1493             :          * the end of the page: don't let the last segment on the page grow
    1494             :          * larger than the target, create a new segment before that happens.
    1495             :          */
    1496        6674 :         if (!dlist_has_next(&leaf->segments, iter.cur) &&
    1497        6674 :             ginCompareItemPointers(&cur->items[cur->nitems - 1], &nextnew[0]) < 0 &&
    1498        6674 :             cur->seg != NULL &&
    1499        3337 :             SizeOfGinPostingList(cur->seg) >= GinPostingListSegmentTargetSize)
    1500             :         {
    1501          21 :             newseg = palloc(sizeof(leafSegmentInfo));
    1502          21 :             newseg->seg = NULL;
    1503          21 :             newseg->items = nextnew;
    1504          21 :             newseg->nitems = nthis;
    1505          21 :             newseg->action = GIN_SEGMENT_INSERT;
    1506          21 :             dlist_push_tail(&leaf->segments, &newseg->node);
    1507          21 :             modified = true;
    1508        3358 :             break;
    1509             :         }
    1510             : 
    1511        3316 :         tmpitems = ginMergeItemPointers(cur->items, cur->nitems,
    1512             :                                         nextnew, nthis,
    1513             :                                         &ntmpitems);
    1514        3316 :         if (ntmpitems != cur->nitems)
    1515             :         {
    1516             :             /*
    1517             :              * If there are no duplicates, track the added items so that we
    1518             :              * can emit a compact ADDITEMS WAL record later on. (it doesn't
    1519             :              * seem worth re-checking which items were duplicates, if there
    1520             :              * were any)
    1521             :              */
    1522        6632 :             if (ntmpitems == nthis + cur->nitems &&
    1523        3316 :                 cur->action == GIN_SEGMENT_UNMODIFIED)
    1524             :             {
    1525        3316 :                 cur->action = GIN_SEGMENT_ADDITEMS;
    1526        3316 :                 cur->modifieditems = nextnew;
    1527        3316 :                 cur->nmodifieditems = nthis;
    1528             :             }
    1529             :             else
    1530           0 :                 cur->action = GIN_SEGMENT_REPLACE;
    1531             : 
    1532        3316 :             cur->items = tmpitems;
    1533        3316 :             cur->nitems = ntmpitems;
    1534        3316 :             cur->seg = NULL;
    1535        3316 :             modified = true;
    1536             :         }
    1537             : 
    1538        3316 :         nextnew += nthis;
    1539        3316 :         newleft -= nthis;
    1540        3316 :         if (newleft == 0)
    1541        3316 :             break;
    1542             :     }
    1543             : 
    1544        3337 :     return modified;
    1545             : }
    1546             : 
    1547             : /*
    1548             :  * Recompresses all segments that have been modified.
    1549             :  *
    1550             :  * If not all the items fit on two pages (ie. after split), we store as
    1551             :  * many items as fit, and set *remaining to the first item that didn't fit.
    1552             :  * If all items fit, *remaining is set to invalid.
    1553             :  *
    1554             :  * Returns true if the page has to be split.
    1555             :  */
    1556             : static bool
    1557        3337 : leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
    1558             : {
    1559        3337 :     int         pgused = 0;
    1560        3337 :     bool        needsplit = false;
    1561             :     dlist_iter  iter;
    1562             :     int         segsize;
    1563             :     leafSegmentInfo *nextseg;
    1564             :     int         npacked;
    1565             :     bool        modified;
    1566             :     dlist_node *cur_node;
    1567             :     dlist_node *next_node;
    1568             : 
    1569        3337 :     ItemPointerSetInvalid(remaining);
    1570             : 
    1571             :     /*
    1572             :      * cannot use dlist_foreach_modify here because we insert adjacent items
    1573             :      * while iterating.
    1574             :      */
    1575       66884 :     for (cur_node = dlist_head_node(&leaf->segments);
    1576             :          cur_node != NULL;
    1577       60210 :          cur_node = next_node)
    1578             :     {
    1579       60212 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
    1580             :                                                    cur_node);
    1581             : 
    1582       60212 :         if (dlist_has_next(&leaf->segments, cur_node))
    1583       56764 :             next_node = dlist_next_node(&leaf->segments, cur_node);
    1584             :         else
    1585        3448 :             next_node = NULL;
    1586             : 
    1587             :         /* Compress the posting list, if necessary */
    1588       60212 :         if (seginfo->action != GIN_SEGMENT_DELETE)
    1589             :         {
    1590       60212 :             if (seginfo->seg == NULL)
    1591             :             {
    1592        3448 :                 if (seginfo->nitems > GinPostingListSegmentMaxSize)
    1593         113 :                     npacked = 0;    /* no chance that it would fit. */
    1594             :                 else
    1595             :                 {
    1596        3335 :                     seginfo->seg = ginCompressPostingList(seginfo->items,
    1597             :                                                           seginfo->nitems,
    1598             :                                                           GinPostingListSegmentMaxSize,
    1599             :                                                           &npacked);
    1600             :                 }
    1601        3448 :                 if (npacked != seginfo->nitems)
    1602             :                 {
    1603             :                     /*
    1604             :                      * Too large. Compress again to the target size, and
    1605             :                      * create a new segment to represent the remaining items.
    1606             :                      * The new segment is inserted after this one, so it will
    1607             :                      * be processed in the next iteration of this loop.
    1608             :                      */
    1609         113 :                     if (seginfo->seg)
    1610           0 :                         pfree(seginfo->seg);
    1611         113 :                     seginfo->seg = ginCompressPostingList(seginfo->items,
    1612             :                                                           seginfo->nitems,
    1613             :                                                           GinPostingListSegmentTargetSize,
    1614             :                                                           &npacked);
    1615         113 :                     if (seginfo->action != GIN_SEGMENT_INSERT)
    1616           0 :                         seginfo->action = GIN_SEGMENT_REPLACE;
    1617             : 
    1618         113 :                     nextseg = palloc(sizeof(leafSegmentInfo));
    1619         113 :                     nextseg->action = GIN_SEGMENT_INSERT;
    1620         113 :                     nextseg->seg = NULL;
    1621         113 :                     nextseg->items = &seginfo->items[npacked];
    1622         113 :                     nextseg->nitems = seginfo->nitems - npacked;
    1623         113 :                     next_node = &nextseg->node;
    1624         113 :                     dlist_insert_after(cur_node, next_node);
    1625             :                 }
    1626             :             }
    1627             : 
    1628             :             /*
    1629             :              * If the segment is very small, merge it with the next segment.
    1630             :              */
    1631       60212 :             if (SizeOfGinPostingList(seginfo->seg) < GinPostingListSegmentMinSize && next_node)
    1632             :             {
    1633             :                 int         nmerged;
    1634             : 
    1635           0 :                 nextseg = dlist_container(leafSegmentInfo, node, next_node);
    1636             : 
    1637           0 :                 if (seginfo->items == NULL)
    1638           0 :                     seginfo->items = ginPostingListDecode(seginfo->seg,
    1639             :                                                           &seginfo->nitems);
    1640           0 :                 if (nextseg->items == NULL)
    1641           0 :                     nextseg->items = ginPostingListDecode(nextseg->seg,
    1642             :                                                           &nextseg->nitems);
    1643           0 :                 nextseg->items =
    1644           0 :                     ginMergeItemPointers(seginfo->items, seginfo->nitems,
    1645           0 :                                          nextseg->items, nextseg->nitems,
    1646             :                                          &nmerged);
    1647           0 :                 Assert(nmerged == seginfo->nitems + nextseg->nitems);
    1648           0 :                 nextseg->nitems = nmerged;
    1649           0 :                 nextseg->seg = NULL;
    1650             : 
    1651           0 :                 nextseg->action = GIN_SEGMENT_REPLACE;
    1652           0 :                 nextseg->modifieditems = NULL;
    1653           0 :                 nextseg->nmodifieditems = 0;
    1654             : 
    1655           0 :                 if (seginfo->action == GIN_SEGMENT_INSERT)
    1656             :                 {
    1657           0 :                     dlist_delete(cur_node);
    1658           0 :                     continue;
    1659             :                 }
    1660             :                 else
    1661             :                 {
    1662           0 :                     seginfo->action = GIN_SEGMENT_DELETE;
    1663           0 :                     seginfo->seg = NULL;
    1664             :                 }
    1665             :             }
    1666             : 
    1667       60212 :             seginfo->items = NULL;
    1668       60212 :             seginfo->nitems = 0;
    1669             :         }
    1670             : 
    1671       60212 :         if (seginfo->action == GIN_SEGMENT_DELETE)
    1672           0 :             continue;
    1673             : 
    1674             :         /*
    1675             :          * OK, we now have a compressed version of this segment ready for
    1676             :          * copying to the page. Did we exceed the size that fits on one page?
    1677             :          */
    1678       60212 :         segsize = SizeOfGinPostingList(seginfo->seg);
    1679       60212 :         if (pgused + segsize > GinDataPageMaxDataSize)
    1680             :         {
    1681           8 :             if (!needsplit)
    1682             :             {
    1683             :                 /* switch to right page */
    1684           6 :                 Assert(pgused > 0);
    1685           6 :                 leaf->lastleft = dlist_prev_node(&leaf->segments, cur_node);
    1686           6 :                 needsplit = true;
    1687           6 :                 leaf->lsize = pgused;
    1688           6 :                 pgused = 0;
    1689             :             }
    1690             :             else
    1691             :             {
    1692             :                 /*
    1693             :                  * Filled both pages. The last segment we constructed did not
    1694             :                  * fit.
    1695             :                  */
    1696           2 :                 *remaining = seginfo->seg->first;
    1697             : 
    1698             :                 /*
    1699             :                  * remove all segments that did not fit from the list.
    1700             :                  */
    1701           6 :                 while (dlist_has_next(&leaf->segments, cur_node))
    1702           2 :                     dlist_delete(dlist_next_node(&leaf->segments, cur_node));
    1703           2 :                 dlist_delete(cur_node);
    1704           2 :                 break;
    1705             :             }
    1706             :         }
    1707             : 
    1708       60210 :         pgused += segsize;
    1709             :     }
    1710             : 
    1711        3337 :     if (!needsplit)
    1712             :     {
    1713        3331 :         leaf->lsize = pgused;
    1714        3331 :         leaf->rsize = 0;
    1715             :     }
    1716             :     else
    1717           6 :         leaf->rsize = pgused;
    1718             : 
    1719        3337 :     Assert(leaf->lsize <= GinDataPageMaxDataSize);
    1720        3337 :     Assert(leaf->rsize <= GinDataPageMaxDataSize);
    1721             : 
    1722             :     /*
    1723             :      * Make a palloc'd copy of every segment after the first modified one,
    1724             :      * because as we start copying items to the original page, we might
    1725             :      * overwrite an existing segment.
    1726             :      */
    1727        3337 :     modified = false;
    1728       63547 :     dlist_foreach(iter, &leaf->segments)
    1729             :     {
    1730       60210 :         leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
    1731             :                                                    iter.cur);
    1732             : 
    1733       60210 :         if (!modified && seginfo->action != GIN_SEGMENT_UNMODIFIED)
    1734             :         {
    1735        3337 :             modified = true;
    1736             :         }
    1737       56873 :         else if (modified && seginfo->action == GIN_SEGMENT_UNMODIFIED)
    1738             :         {
    1739             :             GinPostingList *tmp;
    1740             : 
    1741           0 :             segsize = SizeOfGinPostingList(seginfo->seg);
    1742           0 :             tmp = palloc(segsize);
    1743           0 :             memcpy(tmp, seginfo->seg, segsize);
    1744           0 :             seginfo->seg = tmp;
    1745             :         }
    1746             :     }
    1747             : 
    1748        3337 :     return needsplit;
    1749             : }
    1750             : 
    1751             : 
    1752             : /*** Functions that are exported to the rest of the GIN code ***/
    1753             : 
    1754             : /*
    1755             :  * Creates new posting tree containing the given TIDs. Returns the page
    1756             :  * number of the root of the new posting tree.
    1757             :  *
    1758             :  * items[] must be in sorted order with no duplicates.
    1759             :  */
    1760             : BlockNumber
    1761           4 : createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
    1762             :                   GinStatsData *buildStats)
    1763             : {
    1764             :     BlockNumber blkno;
    1765             :     Buffer      buffer;
    1766             :     Page        tmppage;
    1767             :     Page        page;
    1768             :     Pointer     ptr;
    1769             :     int         nrootitems;
    1770             :     int         rootsize;
    1771             : 
    1772             :     /* Construct the new root page in memory first. */
    1773           4 :     tmppage = (Page) palloc(BLCKSZ);
    1774           4 :     GinInitPage(tmppage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
    1775           4 :     GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
    1776             : 
    1777             :     /*
    1778             :      * Write as many of the items to the root page as fit. In segments of max
    1779             :      * GinPostingListSegmentMaxSize bytes each.
    1780             :      */
    1781           4 :     nrootitems = 0;
    1782           4 :     rootsize = 0;
    1783           4 :     ptr = (Pointer) GinDataLeafPageGetPostingList(tmppage);
    1784          79 :     while (nrootitems < nitems)
    1785             :     {
    1786             :         GinPostingList *segment;
    1787             :         int         npacked;
    1788             :         int         segsize;
    1789             : 
    1790          74 :         segment = ginCompressPostingList(&items[nrootitems],
    1791          74 :                                          nitems - nrootitems,
    1792             :                                          GinPostingListSegmentMaxSize,
    1793             :                                          &npacked);
    1794          74 :         segsize = SizeOfGinPostingList(segment);
    1795          74 :         if (rootsize + segsize > GinDataPageMaxDataSize)
    1796           3 :             break;
    1797             : 
    1798          71 :         memcpy(ptr, segment, segsize);
    1799          71 :         ptr += segsize;
    1800          71 :         rootsize += segsize;
    1801          71 :         nrootitems += npacked;
    1802          71 :         pfree(segment);
    1803             :     }
    1804           4 :     GinDataPageSetDataSize(tmppage, rootsize);
    1805             : 
    1806             :     /*
    1807             :      * All set. Get a new physical page, and copy the in-memory page to it.
    1808             :      */
    1809           4 :     buffer = GinNewBuffer(index);
    1810           4 :     page = BufferGetPage(buffer);
    1811           4 :     blkno = BufferGetBlockNumber(buffer);
    1812             : 
    1813           4 :     START_CRIT_SECTION();
    1814             : 
    1815           4 :     PageRestoreTempPage(tmppage, page);
    1816           4 :     MarkBufferDirty(buffer);
    1817             : 
    1818           4 :     if (RelationNeedsWAL(index))
    1819             :     {
    1820             :         XLogRecPtr  recptr;
    1821             :         ginxlogCreatePostingTree data;
    1822             : 
    1823           4 :         data.size = rootsize;
    1824             : 
    1825           4 :         XLogBeginInsert();
    1826           4 :         XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
    1827             : 
    1828           4 :         XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
    1829             :                          rootsize);
    1830           4 :         XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
    1831             : 
    1832           4 :         recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
    1833           4 :         PageSetLSN(page, recptr);
    1834             :     }
    1835             : 
    1836           4 :     UnlockReleaseBuffer(buffer);
    1837             : 
    1838           4 :     END_CRIT_SECTION();
    1839             : 
    1840             :     /* During index build, count the newly-added data page */
    1841           4 :     if (buildStats)
    1842           1 :         buildStats->nDataPages++;
    1843             : 
    1844           4 :     elog(DEBUG2, "created GIN posting tree with %d items", nrootitems);
    1845             : 
    1846             :     /*
    1847             :      * Add any remaining TIDs to the newly-created posting tree.
    1848             :      */
    1849           4 :     if (nitems > nrootitems)
    1850             :     {
    1851           6 :         ginInsertItemPointers(index, blkno,
    1852           3 :                               items + nrootitems,
    1853             :                               nitems - nrootitems,
    1854             :                               buildStats);
    1855             :     }
    1856             : 
    1857           4 :     return blkno;
    1858             : }
    1859             : 
    1860             : void
    1861        3337 : ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
    1862             : {
    1863        3337 :     memset(btree, 0, sizeof(GinBtreeData));
    1864             : 
    1865        3337 :     btree->index = index;
    1866        3337 :     btree->rootBlkno = rootBlkno;
    1867             : 
    1868        3337 :     btree->findChildPage = dataLocateItem;
    1869        3337 :     btree->getLeftMostChild = dataGetLeftMostPage;
    1870        3337 :     btree->isMoveRight = dataIsMoveRight;
    1871        3337 :     btree->findItem = NULL;
    1872        3337 :     btree->findChildPtr = dataFindChildPtr;
    1873        3337 :     btree->beginPlaceToPage = dataBeginPlaceToPage;
    1874        3337 :     btree->execPlaceToPage = dataExecPlaceToPage;
    1875        3337 :     btree->fillRoot = ginDataFillRoot;
    1876        3337 :     btree->prepareDownlink = dataPrepareDownlink;
    1877             : 
    1878        3337 :     btree->isData = TRUE;
    1879        3337 :     btree->fullScan = FALSE;
    1880        3337 :     btree->isBuild = FALSE;
    1881        3337 : }
    1882             : 
    1883             : /*
    1884             :  * Inserts array of item pointers, may execute several tree scan (very rare)
    1885             :  */
    1886             : void
    1887        3335 : ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
    1888             :                       ItemPointerData *items, uint32 nitem,
    1889             :                       GinStatsData *buildStats)
    1890             : {
    1891             :     GinBtreeData btree;
    1892             :     GinBtreeDataLeafInsertData insertdata;
    1893             :     GinBtreeStack *stack;
    1894             : 
    1895        3335 :     ginPrepareDataScan(&btree, index, rootBlkno);
    1896        3335 :     btree.isBuild = (buildStats != NULL);
    1897        3335 :     insertdata.items = items;
    1898        3335 :     insertdata.nitem = nitem;
    1899        3335 :     insertdata.curitem = 0;
    1900             : 
    1901       10007 :     while (insertdata.curitem < insertdata.nitem)
    1902             :     {
    1903             :         /* search for the leaf page where the first item should go to */
    1904        3337 :         btree.itemptr = insertdata.items[insertdata.curitem];
    1905        3337 :         stack = ginFindLeafPage(&btree, false, NULL);
    1906             : 
    1907        3337 :         ginInsertValue(&btree, stack, &insertdata, buildStats);
    1908             :     }
    1909        3335 : }
    1910             : 
    1911             : /*
    1912             :  * Starts a new scan on a posting tree.
    1913             :  */
    1914             : GinBtreeStack *
    1915           2 : ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno,
    1916             :                         Snapshot snapshot)
    1917             : {
    1918             :     GinBtreeStack *stack;
    1919             : 
    1920           2 :     ginPrepareDataScan(btree, index, rootBlkno);
    1921             : 
    1922           2 :     btree->fullScan = TRUE;
    1923             : 
    1924           2 :     stack = ginFindLeafPage(btree, TRUE, snapshot);
    1925             : 
    1926           2 :     return stack;
    1927             : }

Generated by: LCOV version 1.11