Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gindatapage.c
4 : * routines for handling GIN posting tree pages.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/gindatapage.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/ginxlog.h"
19 : #include "access/xloginsert.h"
20 : #include "lib/ilist.h"
21 : #include "miscadmin.h"
22 : #include "utils/rel.h"
23 :
24 : /*
25 : * Min, Max and Target size of posting lists stored on leaf pages, in bytes.
26 : *
27 : * The code can deal with any size, but random access is more efficient when
28 : * a number of smaller lists are stored, rather than one big list. If a
29 : * posting list would become larger than Max size as a result of insertions,
30 : * it is split into two. If a posting list would be smaller than minimum
31 : * size, it is merged with the next posting list.
32 : */
33 : #define GinPostingListSegmentMaxSize 384
34 : #define GinPostingListSegmentTargetSize 256
35 : #define GinPostingListSegmentMinSize 128
36 :
37 : /*
38 : * At least this many items fit in a GinPostingListSegmentMaxSize-bytes
39 : * long segment. This is used when estimating how much space is required
40 : * for N items, at minimum.
41 : */
42 : #define MinTuplesPerSegment ((GinPostingListSegmentMaxSize - 2) / 6)
43 :
44 : /*
45 : * A working struct for manipulating a posting tree leaf page.
46 : */
47 : typedef struct
48 : {
49 : dlist_head segments; /* a list of leafSegmentInfos */
50 :
51 : /*
52 : * The following fields represent how the segments are split across pages,
53 : * if a page split is required. Filled in by leafRepackItems.
54 : */
55 : dlist_node *lastleft; /* last segment on left page */
56 : int lsize; /* total size on left page */
57 : int rsize; /* total size on right page */
58 :
59 : bool oldformat; /* page is in pre-9.4 format on disk */
60 :
61 : /*
62 : * If we need WAL data representing the reconstructed leaf page, it's
63 : * stored here by computeLeafRecompressWALData.
64 : */
65 : char *walinfo; /* buffer start */
66 : int walinfolen; /* and length */
67 : } disassembledLeaf;
68 :
69 : typedef struct
70 : {
71 : dlist_node node; /* linked list pointers */
72 :
73 : /*-------------
74 : * 'action' indicates the status of this in-memory segment, compared to
75 : * what's on disk. It is one of the GIN_SEGMENT_* action codes:
76 : *
77 : * UNMODIFIED no changes
78 : * DELETE the segment is to be removed. 'seg' and 'items' are
79 : * ignored
80 : * INSERT this is a completely new segment
81 : * REPLACE this replaces an existing segment with new content
82 : * ADDITEMS like REPLACE, but no items have been removed, and we track
83 : * in detail what items have been added to this segment, in
84 : * 'modifieditems'
85 : *-------------
86 : */
87 : char action;
88 :
89 : ItemPointerData *modifieditems;
90 : uint16 nmodifieditems;
91 :
92 : /*
93 : * The following fields represent the items in this segment. If 'items' is
94 : * not NULL, it contains a palloc'd array of the itemsin this segment. If
95 : * 'seg' is not NULL, it contains the items in an already-compressed
96 : * format. It can point to an on-disk page (!modified), or a palloc'd
97 : * segment in memory. If both are set, they must represent the same items.
98 : */
99 : GinPostingList *seg;
100 : ItemPointer items;
101 : int nitems; /* # of items in 'items', if items != NULL */
102 : } leafSegmentInfo;
103 :
104 : static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
105 : static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
106 : GinBtreeStack *stack,
107 : void *insertdata, BlockNumber updateblkno,
108 : Page *newlpage, Page *newrpage);
109 :
110 : static disassembledLeaf *disassembleLeaf(Page page);
111 : static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
112 : static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
113 : int nNewItems);
114 :
115 : static void computeLeafRecompressWALData(disassembledLeaf *leaf);
116 : static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
117 : static void dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
118 : ItemPointerData lbound, ItemPointerData rbound,
119 : Page lpage, Page rpage);
120 :
121 : /*
122 : * Read TIDs from leaf data page to single uncompressed array. The TIDs are
123 : * returned in ascending order.
124 : *
125 : * advancePast is a hint, indicating that the caller is only interested in
126 : * TIDs > advancePast. To return all items, use ItemPointerSetMin.
127 : *
128 : * Note: This function can still return items smaller than advancePast that
129 : * are in the same posting list as the items of interest, so the caller must
130 : * still check all the returned items. But passing it allows this function to
131 : * skip whole posting lists.
132 : */
133 : ItemPointer
134 6 : GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast)
135 : {
136 : ItemPointer result;
137 :
138 6 : if (GinPageIsCompressed(page))
139 : {
140 6 : GinPostingList *seg = GinDataLeafPageGetPostingList(page);
141 6 : Size len = GinDataLeafPageGetPostingListSize(page);
142 6 : Pointer endptr = ((Pointer) seg) + len;
143 : GinPostingList *next;
144 :
145 : /* Skip to the segment containing advancePast+1 */
146 6 : if (ItemPointerIsValid(&advancePast))
147 : {
148 4 : next = GinNextPostingListSegment(seg);
149 12 : while ((Pointer) next < endptr &&
150 4 : ginCompareItemPointers(&next->first, &advancePast) <= 0)
151 : {
152 0 : seg = next;
153 0 : next = GinNextPostingListSegment(seg);
154 : }
155 4 : len = endptr - (Pointer) seg;
156 : }
157 :
158 6 : if (len > 0)
159 6 : result = ginPostingListDecodeAllSegments(seg, len, nitems);
160 : else
161 : {
162 0 : result = NULL;
163 0 : *nitems = 0;
164 : }
165 : }
166 : else
167 : {
168 0 : ItemPointer tmp = dataLeafPageGetUncompressed(page, nitems);
169 :
170 0 : result = palloc((*nitems) * sizeof(ItemPointerData));
171 0 : memcpy(result, tmp, (*nitems) * sizeof(ItemPointerData));
172 : }
173 :
174 6 : return result;
175 : }
176 :
177 : /*
178 : * Places all TIDs from leaf data page to bitmap.
179 : */
180 : int
181 0 : GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm)
182 : {
183 : ItemPointer uncompressed;
184 : int nitems;
185 :
186 0 : if (GinPageIsCompressed(page))
187 : {
188 0 : GinPostingList *segment = GinDataLeafPageGetPostingList(page);
189 0 : Size len = GinDataLeafPageGetPostingListSize(page);
190 :
191 0 : nitems = ginPostingListDecodeAllSegmentsToTbm(segment, len, tbm);
192 : }
193 : else
194 : {
195 0 : uncompressed = dataLeafPageGetUncompressed(page, &nitems);
196 :
197 0 : if (nitems > 0)
198 0 : tbm_add_tuples(tbm, uncompressed, nitems, false);
199 : }
200 :
201 0 : return nitems;
202 : }
203 :
204 : /*
205 : * Get pointer to the uncompressed array of items on a pre-9.4 format
206 : * uncompressed leaf page. The number of items in the array is returned in
207 : * *nitems.
208 : */
209 : static ItemPointer
210 0 : dataLeafPageGetUncompressed(Page page, int *nitems)
211 : {
212 : ItemPointer items;
213 :
214 0 : Assert(!GinPageIsCompressed(page));
215 :
216 : /*
217 : * In the old pre-9.4 page format, the whole page content is used for
218 : * uncompressed items, and the number of items is stored in 'maxoff'
219 : */
220 0 : items = (ItemPointer) GinDataPageGetData(page);
221 0 : *nitems = GinPageGetOpaque(page)->maxoff;
222 :
223 0 : return items;
224 : }
225 :
226 : /*
227 : * Check if we should follow the right link to find the item we're searching
228 : * for.
229 : *
230 : * Compares inserting item pointer with the right bound of the current page.
231 : */
232 : static bool
233 3005 : dataIsMoveRight(GinBtree btree, Page page)
234 : {
235 3005 : ItemPointer iptr = GinDataPageGetRightBound(page);
236 :
237 3005 : if (GinPageRightMost(page))
238 3005 : return FALSE;
239 :
240 0 : return (ginCompareItemPointers(&btree->itemptr, iptr) > 0) ? TRUE : FALSE;
241 : }
242 :
243 : /*
244 : * Find correct PostingItem in non-leaf page. It is assumed that this is
245 : * the correct page, and the searched value SHOULD be on the page.
246 : */
247 : static BlockNumber
248 3007 : dataLocateItem(GinBtree btree, GinBtreeStack *stack)
249 : {
250 : OffsetNumber low,
251 : high,
252 : maxoff;
253 3007 : PostingItem *pitem = NULL;
254 : int result;
255 3007 : Page page = BufferGetPage(stack->buffer);
256 :
257 3007 : Assert(!GinPageIsLeaf(page));
258 3007 : Assert(GinPageIsData(page));
259 :
260 3007 : if (btree->fullScan)
261 : {
262 2 : stack->off = FirstOffsetNumber;
263 2 : stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
264 2 : return btree->getLeftMostChild(btree, page);
265 : }
266 :
267 3005 : low = FirstOffsetNumber;
268 3005 : maxoff = high = GinPageGetOpaque(page)->maxoff;
269 3005 : Assert(high >= low);
270 :
271 3005 : high++;
272 :
273 12020 : while (high > low)
274 : {
275 6010 : OffsetNumber mid = low + ((high - low) / 2);
276 :
277 6010 : pitem = GinDataPageGetPostingItem(page, mid);
278 :
279 6010 : if (mid == maxoff)
280 : {
281 : /*
282 : * Right infinity, page already correctly chosen with a help of
283 : * dataIsMoveRight
284 : */
285 3005 : result = -1;
286 : }
287 : else
288 : {
289 3005 : pitem = GinDataPageGetPostingItem(page, mid);
290 3005 : result = ginCompareItemPointers(&btree->itemptr, &(pitem->key));
291 : }
292 :
293 6010 : if (result == 0)
294 : {
295 0 : stack->off = mid;
296 0 : return PostingItemGetBlockNumber(pitem);
297 : }
298 6010 : else if (result > 0)
299 3005 : low = mid + 1;
300 : else
301 3005 : high = mid;
302 : }
303 :
304 3005 : Assert(high >= FirstOffsetNumber && high <= maxoff);
305 :
306 3005 : stack->off = high;
307 3005 : pitem = GinDataPageGetPostingItem(page, high);
308 3005 : return PostingItemGetBlockNumber(pitem);
309 : }
310 :
311 : /*
312 : * Find link to blkno on non-leaf page, returns offset of PostingItem
313 : */
314 : static OffsetNumber
315 3 : dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
316 : {
317 : OffsetNumber i,
318 3 : maxoff = GinPageGetOpaque(page)->maxoff;
319 : PostingItem *pitem;
320 :
321 3 : Assert(!GinPageIsLeaf(page));
322 3 : Assert(GinPageIsData(page));
323 :
324 : /* if page isn't changed, we return storedOff */
325 3 : if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
326 : {
327 3 : pitem = GinDataPageGetPostingItem(page, storedOff);
328 3 : if (PostingItemGetBlockNumber(pitem) == blkno)
329 3 : return storedOff;
330 :
331 : /*
332 : * we hope, that needed pointer goes to right. It's true if there
333 : * wasn't a deletion
334 : */
335 0 : for (i = storedOff + 1; i <= maxoff; i++)
336 : {
337 0 : pitem = GinDataPageGetPostingItem(page, i);
338 0 : if (PostingItemGetBlockNumber(pitem) == blkno)
339 0 : return i;
340 : }
341 :
342 0 : maxoff = storedOff - 1;
343 : }
344 :
345 : /* last chance */
346 0 : for (i = FirstOffsetNumber; i <= maxoff; i++)
347 : {
348 0 : pitem = GinDataPageGetPostingItem(page, i);
349 0 : if (PostingItemGetBlockNumber(pitem) == blkno)
350 0 : return i;
351 : }
352 :
353 0 : return InvalidOffsetNumber;
354 : }
355 :
356 : /*
357 : * Return blkno of leftmost child
358 : */
359 : static BlockNumber
360 2 : dataGetLeftMostPage(GinBtree btree, Page page)
361 : {
362 : PostingItem *pitem;
363 :
364 2 : Assert(!GinPageIsLeaf(page));
365 2 : Assert(GinPageIsData(page));
366 2 : Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
367 :
368 2 : pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
369 2 : return PostingItemGetBlockNumber(pitem);
370 : }
371 :
372 : /*
373 : * Add PostingItem to a non-leaf page.
374 : */
375 : void
376 9 : GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
377 : {
378 9 : OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
379 : char *ptr;
380 :
381 9 : Assert(PostingItemGetBlockNumber(data) != InvalidBlockNumber);
382 9 : Assert(!GinPageIsLeaf(page));
383 :
384 9 : if (offset == InvalidOffsetNumber)
385 : {
386 6 : ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1);
387 : }
388 : else
389 : {
390 3 : ptr = (char *) GinDataPageGetPostingItem(page, offset);
391 3 : if (offset != maxoff + 1)
392 3 : memmove(ptr + sizeof(PostingItem),
393 : ptr,
394 3 : (maxoff - offset + 1) * sizeof(PostingItem));
395 : }
396 9 : memcpy(ptr, data, sizeof(PostingItem));
397 :
398 9 : maxoff++;
399 9 : GinPageGetOpaque(page)->maxoff = maxoff;
400 :
401 : /*
402 : * Also set pd_lower to the end of the posting items, to follow the
403 : * "standard" page layout, so that we can squeeze out the unused space
404 : * from full-page images.
405 : */
406 9 : GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
407 9 : }
408 :
409 : /*
410 : * Delete posting item from non-leaf page
411 : */
412 : void
413 2 : GinPageDeletePostingItem(Page page, OffsetNumber offset)
414 : {
415 2 : OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
416 :
417 2 : Assert(!GinPageIsLeaf(page));
418 2 : Assert(offset >= FirstOffsetNumber && offset <= maxoff);
419 :
420 2 : if (offset != maxoff)
421 4 : memmove(GinDataPageGetPostingItem(page, offset),
422 2 : GinDataPageGetPostingItem(page, offset + 1),
423 2 : sizeof(PostingItem) * (maxoff - offset));
424 :
425 2 : maxoff--;
426 2 : GinPageGetOpaque(page)->maxoff = maxoff;
427 :
428 2 : GinDataPageSetDataSize(page, maxoff * sizeof(PostingItem));
429 2 : }
430 :
431 : /*
432 : * Prepare to insert data on a leaf data page.
433 : *
434 : * If it will fit, return GPTP_INSERT after doing whatever setup is needed
435 : * before we enter the insertion critical section. *ptp_workspace can be
436 : * set to pass information along to the execPlaceToPage function.
437 : *
438 : * If it won't fit, perform a page split and return two temporary page
439 : * images into *newlpage and *newrpage, with result GPTP_SPLIT.
440 : *
441 : * In neither case should the given page buffer be modified here.
442 : */
443 : static GinPlaceToPageRC
444 3337 : dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
445 : void *insertdata,
446 : void **ptp_workspace,
447 : Page *newlpage, Page *newrpage)
448 : {
449 3337 : GinBtreeDataLeafInsertData *items = insertdata;
450 3337 : ItemPointer newItems = &items->items[items->curitem];
451 3337 : int maxitems = items->nitem - items->curitem;
452 3337 : Page page = BufferGetPage(buf);
453 : int i;
454 : ItemPointerData rbound;
455 : ItemPointerData lbound;
456 : bool needsplit;
457 : bool append;
458 : int segsize;
459 : Size freespace;
460 : disassembledLeaf *leaf;
461 : leafSegmentInfo *lastleftinfo;
462 : ItemPointerData maxOldItem;
463 : ItemPointerData remaining;
464 :
465 3337 : rbound = *GinDataPageGetRightBound(page);
466 :
467 : /*
468 : * Count how many of the new items belong to this page.
469 : */
470 3337 : if (!GinPageRightMost(page))
471 : {
472 0 : for (i = 0; i < maxitems; i++)
473 : {
474 0 : if (ginCompareItemPointers(&newItems[i], &rbound) > 0)
475 : {
476 : /*
477 : * This needs to go to some other location in the tree. (The
478 : * caller should've chosen the insert location so that at
479 : * least the first item goes here.)
480 : */
481 0 : Assert(i > 0);
482 0 : break;
483 : }
484 : }
485 0 : maxitems = i;
486 : }
487 :
488 : /* Disassemble the data on the page */
489 3337 : leaf = disassembleLeaf(page);
490 :
491 : /*
492 : * Are we appending to the end of the page? IOW, are all the new items
493 : * larger than any of the existing items.
494 : */
495 3337 : if (!dlist_is_empty(&leaf->segments))
496 : {
497 3337 : lastleftinfo = dlist_container(leafSegmentInfo, node,
498 : dlist_tail_node(&leaf->segments));
499 3337 : if (!lastleftinfo->items)
500 3337 : lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
501 : &lastleftinfo->nitems);
502 3337 : maxOldItem = lastleftinfo->items[lastleftinfo->nitems - 1];
503 3337 : if (ginCompareItemPointers(&newItems[0], &maxOldItem) >= 0)
504 3337 : append = true;
505 : else
506 0 : append = false;
507 : }
508 : else
509 : {
510 0 : ItemPointerSetMin(&maxOldItem);
511 0 : append = true;
512 : }
513 :
514 : /*
515 : * If we're appending to the end of the page, we will append as many items
516 : * as we can fit (after splitting), and stop when the pages becomes full.
517 : * Otherwise we have to limit the number of new items to insert, because
518 : * once we start packing we can't just stop when we run out of space,
519 : * because we must make sure that all the old items still fit.
520 : */
521 3337 : if (GinPageIsCompressed(page))
522 3337 : freespace = GinDataLeafPageGetFreeSpace(page);
523 : else
524 0 : freespace = 0;
525 3337 : if (append)
526 : {
527 : /*
528 : * Even when appending, trying to append more items than will fit is
529 : * not completely free, because we will merge the new items and old
530 : * items into an array below. In the best case, every new item fits in
531 : * a single byte, and we can use all the free space on the old page as
532 : * well as the new page. For simplicity, ignore segment overhead etc.
533 : */
534 3337 : maxitems = Min(maxitems, freespace + GinDataPageMaxDataSize);
535 : }
536 : else
537 : {
538 : /*
539 : * Calculate a conservative estimate of how many new items we can fit
540 : * on the two pages after splitting.
541 : *
542 : * We can use any remaining free space on the old page to store full
543 : * segments, as well as the new page. Each full-sized segment can hold
544 : * at least MinTuplesPerSegment items
545 : */
546 : int nnewsegments;
547 :
548 0 : nnewsegments = freespace / GinPostingListSegmentMaxSize;
549 0 : nnewsegments += GinDataPageMaxDataSize / GinPostingListSegmentMaxSize;
550 0 : maxitems = Min(maxitems, nnewsegments * MinTuplesPerSegment);
551 : }
552 :
553 : /* Add the new items to the segment list */
554 3337 : if (!addItemsToLeaf(leaf, newItems, maxitems))
555 : {
556 : /* all items were duplicates, we have nothing to do */
557 0 : items->curitem += maxitems;
558 :
559 0 : return GPTP_NO_WORK;
560 : }
561 :
562 : /*
563 : * Pack the items back to compressed segments, ready for writing to disk.
564 : */
565 3337 : needsplit = leafRepackItems(leaf, &remaining);
566 :
567 : /*
568 : * Did all the new items fit?
569 : *
570 : * If we're appending, it's OK if they didn't. But as a sanity check,
571 : * verify that all the old items fit.
572 : */
573 3337 : if (ItemPointerIsValid(&remaining))
574 : {
575 2 : if (!append || ItemPointerCompare(&maxOldItem, &remaining) >= 0)
576 0 : elog(ERROR, "could not split GIN page; all old items didn't fit");
577 :
578 : /* Count how many of the new items did fit. */
579 15318 : for (i = 0; i < maxitems; i++)
580 : {
581 15318 : if (ginCompareItemPointers(&newItems[i], &remaining) >= 0)
582 2 : break;
583 : }
584 2 : if (i == 0)
585 0 : elog(ERROR, "could not split GIN page; no new items fit");
586 2 : maxitems = i;
587 : }
588 :
589 3337 : if (!needsplit)
590 : {
591 : /*
592 : * Great, all the items fit on a single page. If needed, prepare data
593 : * for a WAL record describing the changes we'll make.
594 : */
595 3331 : if (RelationNeedsWAL(btree->index))
596 3331 : computeLeafRecompressWALData(leaf);
597 :
598 : /*
599 : * We're ready to enter the critical section, but
600 : * dataExecPlaceToPageLeaf will need access to the "leaf" data.
601 : */
602 3331 : *ptp_workspace = leaf;
603 :
604 3331 : if (append)
605 3331 : elog(DEBUG2, "appended %d new items to block %u; %d bytes (%d to go)",
606 : maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
607 : items->nitem - items->curitem - maxitems);
608 : else
609 0 : elog(DEBUG2, "inserted %d new items to block %u; %d bytes (%d to go)",
610 : maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize,
611 : items->nitem - items->curitem - maxitems);
612 : }
613 : else
614 : {
615 : /*
616 : * Have to split.
617 : *
618 : * leafRepackItems already divided the segments between the left and
619 : * the right page. It filled the left page as full as possible, and
620 : * put the rest to the right page. When building a new index, that's
621 : * good, because the table is scanned from beginning to end and there
622 : * won't be any more insertions to the left page during the build.
623 : * This packs the index as tight as possible. But otherwise, split
624 : * 50/50, by moving segments from the left page to the right page
625 : * until they're balanced.
626 : *
627 : * As a further heuristic, when appending items to the end of the
628 : * page, try to make the left page 75% full, on the assumption that
629 : * subsequent insertions will probably also go to the end. This packs
630 : * the index somewhat tighter when appending to a table, which is very
631 : * common.
632 : */
633 6 : if (!btree->isBuild)
634 : {
635 27 : while (dlist_has_prev(&leaf->segments, leaf->lastleft))
636 : {
637 22 : lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
638 :
639 : /* ignore deleted segments */
640 22 : if (lastleftinfo->action != GIN_SEGMENT_DELETE)
641 : {
642 22 : segsize = SizeOfGinPostingList(lastleftinfo->seg);
643 :
644 : /*
645 : * Note that we check that the right page doesn't become
646 : * more full than the left page even when appending. It's
647 : * possible that we added enough items to make both pages
648 : * more than 75% full.
649 : */
650 22 : if ((leaf->lsize - segsize) - (leaf->rsize + segsize) < 0)
651 4 : break;
652 18 : if (append)
653 : {
654 18 : if ((leaf->lsize - segsize) < (BLCKSZ * 3) / 4)
655 1 : break;
656 : }
657 :
658 17 : leaf->lsize -= segsize;
659 17 : leaf->rsize += segsize;
660 : }
661 17 : leaf->lastleft = dlist_prev_node(&leaf->segments, leaf->lastleft);
662 : }
663 : }
664 6 : Assert(leaf->lsize <= GinDataPageMaxDataSize);
665 6 : Assert(leaf->rsize <= GinDataPageMaxDataSize);
666 :
667 : /*
668 : * Fetch the max item in the left page's last segment; it becomes the
669 : * right bound of the page.
670 : */
671 6 : lastleftinfo = dlist_container(leafSegmentInfo, node, leaf->lastleft);
672 6 : if (!lastleftinfo->items)
673 6 : lastleftinfo->items = ginPostingListDecode(lastleftinfo->seg,
674 : &lastleftinfo->nitems);
675 6 : lbound = lastleftinfo->items[lastleftinfo->nitems - 1];
676 :
677 : /*
678 : * Now allocate a couple of temporary page images, and fill them.
679 : */
680 6 : *newlpage = palloc(BLCKSZ);
681 6 : *newrpage = palloc(BLCKSZ);
682 :
683 6 : dataPlaceToPageLeafSplit(leaf, lbound, rbound,
684 : *newlpage, *newrpage);
685 :
686 6 : Assert(GinPageRightMost(page) ||
687 : ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
688 : GinDataPageGetRightBound(*newrpage)) < 0);
689 :
690 6 : if (append)
691 6 : elog(DEBUG2, "appended %d items to block %u; split %d/%d (%d to go)",
692 : maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
693 : items->nitem - items->curitem - maxitems);
694 : else
695 0 : elog(DEBUG2, "inserted %d items to block %u; split %d/%d (%d to go)",
696 : maxitems, BufferGetBlockNumber(buf), (int) leaf->lsize, (int) leaf->rsize,
697 : items->nitem - items->curitem - maxitems);
698 : }
699 :
700 3337 : items->curitem += maxitems;
701 :
702 3337 : return needsplit ? GPTP_SPLIT : GPTP_INSERT;
703 : }
704 :
705 : /*
706 : * Perform data insertion after beginPlaceToPage has decided it will fit.
707 : *
708 : * This is invoked within a critical section, and XLOG record creation (if
709 : * needed) is already started. The target buffer is registered in slot 0.
710 : */
711 : static void
712 3331 : dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
713 : void *insertdata, void *ptp_workspace)
714 : {
715 3331 : disassembledLeaf *leaf = (disassembledLeaf *) ptp_workspace;
716 :
717 : /* Apply changes to page */
718 3331 : dataPlaceToPageLeafRecompress(buf, leaf);
719 :
720 : /* If needed, register WAL data built by computeLeafRecompressWALData */
721 3331 : if (RelationNeedsWAL(btree->index))
722 : {
723 3331 : XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
724 : }
725 3331 : }
726 :
727 : /*
728 : * Vacuum a posting tree leaf page.
729 : */
730 : void
731 8 : ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
732 : {
733 8 : Page page = BufferGetPage(buffer);
734 : disassembledLeaf *leaf;
735 8 : bool removedsomething = false;
736 : dlist_iter iter;
737 :
738 8 : leaf = disassembleLeaf(page);
739 :
740 : /* Vacuum each segment. */
741 179 : dlist_foreach(iter, &leaf->segments)
742 : {
743 171 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
744 : int oldsegsize;
745 : ItemPointer cleaned;
746 : int ncleaned;
747 :
748 171 : if (!seginfo->items)
749 171 : seginfo->items = ginPostingListDecode(seginfo->seg,
750 : &seginfo->nitems);
751 171 : if (seginfo->seg)
752 171 : oldsegsize = SizeOfGinPostingList(seginfo->seg);
753 : else
754 0 : oldsegsize = GinDataPageMaxDataSize;
755 :
756 171 : cleaned = ginVacuumItemPointers(gvs,
757 : seginfo->items,
758 : seginfo->nitems,
759 : &ncleaned);
760 171 : pfree(seginfo->items);
761 171 : seginfo->items = NULL;
762 171 : seginfo->nitems = 0;
763 171 : if (cleaned)
764 : {
765 145 : if (ncleaned > 0)
766 : {
767 : int npacked;
768 :
769 5 : seginfo->seg = ginCompressPostingList(cleaned,
770 : ncleaned,
771 : oldsegsize,
772 : &npacked);
773 : /* Removing an item never increases the size of the segment */
774 5 : if (npacked != ncleaned)
775 0 : elog(ERROR, "could not fit vacuumed posting list");
776 5 : seginfo->action = GIN_SEGMENT_REPLACE;
777 : }
778 : else
779 : {
780 140 : seginfo->seg = NULL;
781 140 : seginfo->items = NULL;
782 140 : seginfo->action = GIN_SEGMENT_DELETE;
783 : }
784 145 : seginfo->nitems = ncleaned;
785 :
786 145 : removedsomething = true;
787 : }
788 : }
789 :
790 : /*
791 : * If we removed any items, reconstruct the page from the pieces.
792 : *
793 : * We don't try to re-encode the segments here, even though some of them
794 : * might be really small now that we've removed some items from them. It
795 : * seems like a waste of effort, as there isn't really any benefit from
796 : * larger segments per se; larger segments only help to pack more items in
797 : * the same space. We might as well delay doing that until the next
798 : * insertion, which will need to re-encode at least part of the page
799 : * anyway.
800 : *
801 : * Also note if the page was in uncompressed, pre-9.4 format before, it is
802 : * now represented as one huge segment that contains all the items. It
803 : * might make sense to split that, to speed up random access, but we don't
804 : * bother. You'll have to REINDEX anyway if you want the full gain of the
805 : * new tighter index format.
806 : */
807 8 : if (removedsomething)
808 : {
809 : bool modified;
810 :
811 : /*
812 : * Make sure we have a palloc'd copy of all segments, after the first
813 : * segment that is modified. (dataPlaceToPageLeafRecompress requires
814 : * this).
815 : */
816 8 : modified = false;
817 179 : dlist_foreach(iter, &leaf->segments)
818 : {
819 171 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
820 : iter.cur);
821 :
822 171 : if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
823 145 : modified = true;
824 171 : if (modified && seginfo->action != GIN_SEGMENT_DELETE)
825 : {
826 29 : int segsize = SizeOfGinPostingList(seginfo->seg);
827 29 : GinPostingList *tmp = (GinPostingList *) palloc(segsize);
828 :
829 29 : memcpy(tmp, seginfo->seg, segsize);
830 29 : seginfo->seg = tmp;
831 : }
832 : }
833 :
834 8 : if (RelationNeedsWAL(indexrel))
835 8 : computeLeafRecompressWALData(leaf);
836 :
837 : /* Apply changes to page */
838 8 : START_CRIT_SECTION();
839 :
840 8 : dataPlaceToPageLeafRecompress(buffer, leaf);
841 :
842 8 : MarkBufferDirty(buffer);
843 :
844 8 : if (RelationNeedsWAL(indexrel))
845 : {
846 : XLogRecPtr recptr;
847 :
848 8 : XLogBeginInsert();
849 8 : XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
850 8 : XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
851 8 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
852 8 : PageSetLSN(page, recptr);
853 : }
854 :
855 8 : END_CRIT_SECTION();
856 : }
857 8 : }
858 :
859 : /*
860 : * Construct a ginxlogRecompressDataLeaf record representing the changes
861 : * in *leaf. (Because this requires a palloc, we have to do it before
862 : * we enter the critical section that actually updates the page.)
863 : */
864 : static void
865 3339 : computeLeafRecompressWALData(disassembledLeaf *leaf)
866 : {
867 3339 : int nmodified = 0;
868 : char *walbufbegin;
869 : char *walbufend;
870 : dlist_iter iter;
871 : int segno;
872 : ginxlogRecompressDataLeaf *recompress_xlog;
873 :
874 : /* Count the modified segments */
875 63452 : dlist_foreach(iter, &leaf->segments)
876 : {
877 60113 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
878 : iter.cur);
879 :
880 60113 : if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
881 3479 : nmodified++;
882 : }
883 :
884 3339 : walbufbegin =
885 3339 : palloc(sizeof(ginxlogRecompressDataLeaf) +
886 : BLCKSZ + /* max size needed to hold the segment data */
887 3339 : nmodified * 2 /* (segno + action) per action */
888 : );
889 3339 : walbufend = walbufbegin;
890 :
891 3339 : recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
892 3339 : walbufend += sizeof(ginxlogRecompressDataLeaf);
893 :
894 3339 : recompress_xlog->nactions = nmodified;
895 :
896 3339 : segno = 0;
897 63452 : dlist_foreach(iter, &leaf->segments)
898 : {
899 60113 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
900 : iter.cur);
901 60113 : int segsize = 0;
902 : int datalen;
903 60113 : uint8 action = seginfo->action;
904 :
905 60113 : if (action == GIN_SEGMENT_UNMODIFIED)
906 : {
907 56634 : segno++;
908 56634 : continue;
909 : }
910 :
911 3479 : if (action != GIN_SEGMENT_DELETE)
912 3339 : segsize = SizeOfGinPostingList(seginfo->seg);
913 :
914 : /*
915 : * If storing the uncompressed list of added item pointers would take
916 : * more space than storing the compressed segment as is, do that
917 : * instead.
918 : */
919 6794 : if (action == GIN_SEGMENT_ADDITEMS &&
920 3315 : seginfo->nmodifieditems * sizeof(ItemPointerData) > segsize)
921 : {
922 0 : action = GIN_SEGMENT_REPLACE;
923 : }
924 :
925 3479 : *((uint8 *) (walbufend++)) = segno;
926 3479 : *(walbufend++) = action;
927 :
928 3479 : switch (action)
929 : {
930 : case GIN_SEGMENT_DELETE:
931 140 : datalen = 0;
932 140 : break;
933 :
934 : case GIN_SEGMENT_ADDITEMS:
935 3315 : datalen = seginfo->nmodifieditems * sizeof(ItemPointerData);
936 3315 : memcpy(walbufend, &seginfo->nmodifieditems, sizeof(uint16));
937 3315 : memcpy(walbufend + sizeof(uint16), seginfo->modifieditems, datalen);
938 3315 : datalen += sizeof(uint16);
939 3315 : break;
940 :
941 : case GIN_SEGMENT_INSERT:
942 : case GIN_SEGMENT_REPLACE:
943 24 : datalen = SHORTALIGN(segsize);
944 24 : memcpy(walbufend, seginfo->seg, segsize);
945 24 : break;
946 :
947 : default:
948 0 : elog(ERROR, "unexpected GIN leaf action %d", action);
949 : }
950 3479 : walbufend += datalen;
951 :
952 3479 : if (action != GIN_SEGMENT_INSERT)
953 3460 : segno++;
954 : }
955 :
956 : /* Pass back the constructed info via *leaf */
957 3339 : leaf->walinfo = walbufbegin;
958 3339 : leaf->walinfolen = walbufend - walbufbegin;
959 3339 : }
960 :
961 : /*
962 : * Assemble a disassembled posting tree leaf page back to a buffer.
963 : *
964 : * This just updates the target buffer; WAL stuff is caller's responsibility.
965 : *
966 : * NOTE: The segment pointers must not point directly to the same buffer,
967 : * except for segments that have not been modified and whose preceding
968 : * segments have not been modified either.
969 : */
970 : static void
971 3339 : dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
972 : {
973 3339 : Page page = BufferGetPage(buf);
974 : char *ptr;
975 : int newsize;
976 3339 : bool modified = false;
977 : dlist_iter iter;
978 : int segsize;
979 :
980 : /*
981 : * If the page was in pre-9.4 format before, convert the header, and force
982 : * all segments to be copied to the page whether they were modified or
983 : * not.
984 : */
985 3339 : if (!GinPageIsCompressed(page))
986 : {
987 0 : Assert(leaf->oldformat);
988 0 : GinPageSetCompressed(page);
989 0 : GinPageGetOpaque(page)->maxoff = InvalidOffsetNumber;
990 0 : modified = true;
991 : }
992 :
993 3339 : ptr = (char *) GinDataLeafPageGetPostingList(page);
994 3339 : newsize = 0;
995 63452 : dlist_foreach(iter, &leaf->segments)
996 : {
997 60113 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node, iter.cur);
998 :
999 60113 : if (seginfo->action != GIN_SEGMENT_UNMODIFIED)
1000 3479 : modified = true;
1001 :
1002 60113 : if (seginfo->action != GIN_SEGMENT_DELETE)
1003 : {
1004 59973 : segsize = SizeOfGinPostingList(seginfo->seg);
1005 :
1006 59973 : if (modified)
1007 3363 : memcpy(ptr, seginfo->seg, segsize);
1008 :
1009 59973 : ptr += segsize;
1010 59973 : newsize += segsize;
1011 : }
1012 : }
1013 :
1014 3339 : Assert(newsize <= GinDataPageMaxDataSize);
1015 3339 : GinDataPageSetDataSize(page, newsize);
1016 3339 : }
1017 :
1018 : /*
1019 : * Like dataPlaceToPageLeafRecompress, but writes the disassembled leaf
1020 : * segments to two pages instead of one.
1021 : *
1022 : * This is different from the non-split cases in that this does not modify
1023 : * the original page directly, but writes to temporary in-memory copies of
1024 : * the new left and right pages.
1025 : */
1026 : static void
1027 6 : dataPlaceToPageLeafSplit(disassembledLeaf *leaf,
1028 : ItemPointerData lbound, ItemPointerData rbound,
1029 : Page lpage, Page rpage)
1030 : {
1031 : char *ptr;
1032 : int segsize;
1033 : int lsize;
1034 : int rsize;
1035 : dlist_node *node;
1036 : dlist_node *firstright;
1037 : leafSegmentInfo *seginfo;
1038 :
1039 : /* Initialize temporary pages to hold the new left and right pages */
1040 6 : GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
1041 6 : GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
1042 :
1043 : /*
1044 : * Copy the segments that go to the left page.
1045 : *
1046 : * XXX: We should skip copying the unmodified part of the left page, like
1047 : * we do when recompressing.
1048 : */
1049 6 : lsize = 0;
1050 6 : ptr = (char *) GinDataLeafPageGetPostingList(lpage);
1051 6 : firstright = dlist_next_node(&leaf->segments, leaf->lastleft);
1052 151 : for (node = dlist_head_node(&leaf->segments);
1053 : node != firstright;
1054 139 : node = dlist_next_node(&leaf->segments, node))
1055 : {
1056 139 : seginfo = dlist_container(leafSegmentInfo, node, node);
1057 :
1058 139 : if (seginfo->action != GIN_SEGMENT_DELETE)
1059 : {
1060 139 : segsize = SizeOfGinPostingList(seginfo->seg);
1061 139 : memcpy(ptr, seginfo->seg, segsize);
1062 139 : ptr += segsize;
1063 139 : lsize += segsize;
1064 : }
1065 : }
1066 6 : Assert(lsize == leaf->lsize);
1067 6 : GinDataPageSetDataSize(lpage, lsize);
1068 6 : *GinDataPageGetRightBound(lpage) = lbound;
1069 :
1070 : /* Copy the segments that go to the right page */
1071 6 : ptr = (char *) GinDataLeafPageGetPostingList(rpage);
1072 6 : rsize = 0;
1073 6 : for (node = firstright;
1074 : ;
1075 123 : node = dlist_next_node(&leaf->segments, node))
1076 : {
1077 129 : seginfo = dlist_container(leafSegmentInfo, node, node);
1078 :
1079 129 : if (seginfo->action != GIN_SEGMENT_DELETE)
1080 : {
1081 129 : segsize = SizeOfGinPostingList(seginfo->seg);
1082 129 : memcpy(ptr, seginfo->seg, segsize);
1083 129 : ptr += segsize;
1084 129 : rsize += segsize;
1085 : }
1086 :
1087 129 : if (!dlist_has_next(&leaf->segments, node))
1088 6 : break;
1089 123 : }
1090 6 : Assert(rsize == leaf->rsize);
1091 6 : GinDataPageSetDataSize(rpage, rsize);
1092 6 : *GinDataPageGetRightBound(rpage) = rbound;
1093 6 : }
1094 :
1095 : /*
1096 : * Prepare to insert data on an internal data page.
1097 : *
1098 : * If it will fit, return GPTP_INSERT after doing whatever setup is needed
1099 : * before we enter the insertion critical section. *ptp_workspace can be
1100 : * set to pass information along to the execPlaceToPage function.
1101 : *
1102 : * If it won't fit, perform a page split and return two temporary page
1103 : * images into *newlpage and *newrpage, with result GPTP_SPLIT.
1104 : *
1105 : * In neither case should the given page buffer be modified here.
1106 : *
1107 : * Note: on insertion to an internal node, in addition to inserting the given
1108 : * item, the downlink of the existing item at stack->off will be updated to
1109 : * point to updateblkno.
1110 : */
1111 : static GinPlaceToPageRC
1112 3 : dataBeginPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1113 : void *insertdata, BlockNumber updateblkno,
1114 : void **ptp_workspace,
1115 : Page *newlpage, Page *newrpage)
1116 : {
1117 3 : Page page = BufferGetPage(buf);
1118 :
1119 : /* If it doesn't fit, deal with split case */
1120 3 : if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
1121 : {
1122 0 : dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
1123 : newlpage, newrpage);
1124 0 : return GPTP_SPLIT;
1125 : }
1126 :
1127 : /* Else, we're ready to proceed with insertion */
1128 3 : return GPTP_INSERT;
1129 : }
1130 :
1131 : /*
1132 : * Perform data insertion after beginPlaceToPage has decided it will fit.
1133 : *
1134 : * This is invoked within a critical section, and XLOG record creation (if
1135 : * needed) is already started. The target buffer is registered in slot 0.
1136 : */
1137 : static void
1138 3 : dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1139 : void *insertdata, BlockNumber updateblkno,
1140 : void *ptp_workspace)
1141 : {
1142 3 : Page page = BufferGetPage(buf);
1143 3 : OffsetNumber off = stack->off;
1144 : PostingItem *pitem;
1145 :
1146 : /* Update existing downlink to point to next page (on internal page) */
1147 3 : pitem = GinDataPageGetPostingItem(page, off);
1148 3 : PostingItemSetBlockNumber(pitem, updateblkno);
1149 :
1150 : /* Add new item */
1151 3 : pitem = (PostingItem *) insertdata;
1152 3 : GinDataPageAddPostingItem(page, pitem, off);
1153 :
1154 3 : if (RelationNeedsWAL(btree->index))
1155 : {
1156 : /*
1157 : * This must be static, because it has to survive until XLogInsert,
1158 : * and we can't palloc here. Ugly, but the XLogInsert infrastructure
1159 : * isn't reentrant anyway.
1160 : */
1161 : static ginxlogInsertDataInternal data;
1162 :
1163 3 : data.offset = off;
1164 3 : data.newitem = *pitem;
1165 :
1166 3 : XLogRegisterBufData(0, (char *) &data,
1167 : sizeof(ginxlogInsertDataInternal));
1168 : }
1169 3 : }
1170 :
1171 : /*
1172 : * Prepare to insert data on a posting-tree data page.
1173 : *
1174 : * If it will fit, return GPTP_INSERT after doing whatever setup is needed
1175 : * before we enter the insertion critical section. *ptp_workspace can be
1176 : * set to pass information along to the execPlaceToPage function.
1177 : *
1178 : * If it won't fit, perform a page split and return two temporary page
1179 : * images into *newlpage and *newrpage, with result GPTP_SPLIT.
1180 : *
1181 : * In neither case should the given page buffer be modified here.
1182 : *
1183 : * Note: on insertion to an internal node, in addition to inserting the given
1184 : * item, the downlink of the existing item at stack->off will be updated to
1185 : * point to updateblkno.
1186 : *
1187 : * Calls relevant function for internal or leaf page because they are handled
1188 : * very differently.
1189 : */
1190 : static GinPlaceToPageRC
1191 3340 : dataBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1192 : void *insertdata, BlockNumber updateblkno,
1193 : void **ptp_workspace,
1194 : Page *newlpage, Page *newrpage)
1195 : {
1196 3340 : Page page = BufferGetPage(buf);
1197 :
1198 3340 : Assert(GinPageIsData(page));
1199 :
1200 3340 : if (GinPageIsLeaf(page))
1201 3337 : return dataBeginPlaceToPageLeaf(btree, buf, stack, insertdata,
1202 : ptp_workspace,
1203 : newlpage, newrpage);
1204 : else
1205 3 : return dataBeginPlaceToPageInternal(btree, buf, stack,
1206 : insertdata, updateblkno,
1207 : ptp_workspace,
1208 : newlpage, newrpage);
1209 : }
1210 :
1211 : /*
1212 : * Perform data insertion after beginPlaceToPage has decided it will fit.
1213 : *
1214 : * This is invoked within a critical section, and XLOG record creation (if
1215 : * needed) is already started. The target buffer is registered in slot 0.
1216 : *
1217 : * Calls relevant function for internal or leaf page because they are handled
1218 : * very differently.
1219 : */
1220 : static void
1221 3334 : dataExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
1222 : void *insertdata, BlockNumber updateblkno,
1223 : void *ptp_workspace)
1224 : {
1225 3334 : Page page = BufferGetPage(buf);
1226 :
1227 3334 : if (GinPageIsLeaf(page))
1228 3331 : dataExecPlaceToPageLeaf(btree, buf, stack, insertdata,
1229 : ptp_workspace);
1230 : else
1231 3 : dataExecPlaceToPageInternal(btree, buf, stack, insertdata,
1232 : updateblkno, ptp_workspace);
1233 3334 : }
1234 :
1235 : /*
1236 : * Split internal page and insert new data.
1237 : *
1238 : * Returns new temp pages to *newlpage and *newrpage.
1239 : * The original buffer is left untouched.
1240 : */
1241 : static void
1242 0 : dataSplitPageInternal(GinBtree btree, Buffer origbuf,
1243 : GinBtreeStack *stack,
1244 : void *insertdata, BlockNumber updateblkno,
1245 : Page *newlpage, Page *newrpage)
1246 : {
1247 0 : Page oldpage = BufferGetPage(origbuf);
1248 0 : OffsetNumber off = stack->off;
1249 0 : int nitems = GinPageGetOpaque(oldpage)->maxoff;
1250 : int nleftitems;
1251 : int nrightitems;
1252 0 : Size pageSize = PageGetPageSize(oldpage);
1253 0 : ItemPointerData oldbound = *GinDataPageGetRightBound(oldpage);
1254 : ItemPointer bound;
1255 : Page lpage;
1256 : Page rpage;
1257 : OffsetNumber separator;
1258 : PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
1259 :
1260 0 : lpage = PageGetTempPage(oldpage);
1261 0 : rpage = PageGetTempPage(oldpage);
1262 0 : GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
1263 0 : GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
1264 :
1265 : /*
1266 : * First construct a new list of PostingItems, which includes all the old
1267 : * items, and the new item.
1268 : */
1269 0 : memcpy(allitems, GinDataPageGetPostingItem(oldpage, FirstOffsetNumber),
1270 : (off - 1) * sizeof(PostingItem));
1271 :
1272 0 : allitems[off - 1] = *((PostingItem *) insertdata);
1273 0 : memcpy(&allitems[off], GinDataPageGetPostingItem(oldpage, off),
1274 0 : (nitems - (off - 1)) * sizeof(PostingItem));
1275 0 : nitems++;
1276 :
1277 : /* Update existing downlink to point to next page */
1278 0 : PostingItemSetBlockNumber(&allitems[off], updateblkno);
1279 :
1280 : /*
1281 : * When creating a new index, fit as many tuples as possible on the left
1282 : * page, on the assumption that the table is scanned from beginning to
1283 : * end. This packs the index as tight as possible.
1284 : */
1285 0 : if (btree->isBuild && GinPageRightMost(oldpage))
1286 0 : separator = GinNonLeafDataPageGetFreeSpace(rpage) / sizeof(PostingItem);
1287 : else
1288 0 : separator = nitems / 2;
1289 0 : nleftitems = separator;
1290 0 : nrightitems = nitems - separator;
1291 :
1292 0 : memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
1293 : allitems,
1294 : nleftitems * sizeof(PostingItem));
1295 0 : GinPageGetOpaque(lpage)->maxoff = nleftitems;
1296 0 : memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
1297 0 : &allitems[separator],
1298 : nrightitems * sizeof(PostingItem));
1299 0 : GinPageGetOpaque(rpage)->maxoff = nrightitems;
1300 :
1301 : /*
1302 : * Also set pd_lower for both pages, like GinDataPageAddPostingItem does.
1303 : */
1304 0 : GinDataPageSetDataSize(lpage, nleftitems * sizeof(PostingItem));
1305 0 : GinDataPageSetDataSize(rpage, nrightitems * sizeof(PostingItem));
1306 :
1307 : /* set up right bound for left page */
1308 0 : bound = GinDataPageGetRightBound(lpage);
1309 0 : *bound = GinDataPageGetPostingItem(lpage, nleftitems)->key;
1310 :
1311 : /* set up right bound for right page */
1312 0 : *GinDataPageGetRightBound(rpage) = oldbound;
1313 :
1314 : /* return temp pages to caller */
1315 0 : *newlpage = lpage;
1316 0 : *newrpage = rpage;
1317 0 : }
1318 :
1319 : /*
1320 : * Construct insertion payload for inserting the downlink for given buffer.
1321 : */
1322 : static void *
1323 3 : dataPrepareDownlink(GinBtree btree, Buffer lbuf)
1324 : {
1325 3 : PostingItem *pitem = palloc(sizeof(PostingItem));
1326 3 : Page lpage = BufferGetPage(lbuf);
1327 :
1328 3 : PostingItemSetBlockNumber(pitem, BufferGetBlockNumber(lbuf));
1329 3 : pitem->key = *GinDataPageGetRightBound(lpage);
1330 :
1331 3 : return pitem;
1332 : }
1333 :
1334 : /*
1335 : * Fills new root by right bound values from child.
1336 : * Also called from ginxlog, should not use btree
1337 : */
1338 : void
1339 3 : ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage)
1340 : {
1341 : PostingItem li,
1342 : ri;
1343 :
1344 3 : li.key = *GinDataPageGetRightBound(lpage);
1345 3 : PostingItemSetBlockNumber(&li, lblkno);
1346 3 : GinDataPageAddPostingItem(root, &li, InvalidOffsetNumber);
1347 :
1348 3 : ri.key = *GinDataPageGetRightBound(rpage);
1349 3 : PostingItemSetBlockNumber(&ri, rblkno);
1350 3 : GinDataPageAddPostingItem(root, &ri, InvalidOffsetNumber);
1351 3 : }
1352 :
1353 :
1354 : /*** Functions to work with disassembled leaf pages ***/
1355 :
1356 : /*
1357 : * Disassemble page into a disassembledLeaf struct.
1358 : */
1359 : static disassembledLeaf *
1360 3345 : disassembleLeaf(Page page)
1361 : {
1362 : disassembledLeaf *leaf;
1363 : GinPostingList *seg;
1364 : Pointer segbegin;
1365 : Pointer segend;
1366 :
1367 3345 : leaf = palloc0(sizeof(disassembledLeaf));
1368 3345 : dlist_init(&leaf->segments);
1369 :
1370 3345 : if (GinPageIsCompressed(page))
1371 : {
1372 : /*
1373 : * Create a leafSegment entry for each segment.
1374 : */
1375 3345 : seg = GinDataLeafPageGetPostingList(page);
1376 3345 : segbegin = (Pointer) seg;
1377 3345 : segend = segbegin + GinDataLeafPageGetPostingListSize(page);
1378 66941 : while ((Pointer) seg < segend)
1379 : {
1380 60251 : leafSegmentInfo *seginfo = palloc(sizeof(leafSegmentInfo));
1381 :
1382 60251 : seginfo->action = GIN_SEGMENT_UNMODIFIED;
1383 60251 : seginfo->seg = seg;
1384 60251 : seginfo->items = NULL;
1385 60251 : seginfo->nitems = 0;
1386 60251 : dlist_push_tail(&leaf->segments, &seginfo->node);
1387 :
1388 60251 : seg = GinNextPostingListSegment(seg);
1389 : }
1390 3345 : leaf->oldformat = false;
1391 : }
1392 : else
1393 : {
1394 : /*
1395 : * A pre-9.4 format uncompressed page is represented by a single
1396 : * segment, with an array of items.
1397 : */
1398 : ItemPointer uncompressed;
1399 : int nuncompressed;
1400 : leafSegmentInfo *seginfo;
1401 :
1402 0 : uncompressed = dataLeafPageGetUncompressed(page, &nuncompressed);
1403 :
1404 0 : seginfo = palloc(sizeof(leafSegmentInfo));
1405 :
1406 0 : seginfo->action = GIN_SEGMENT_REPLACE;
1407 0 : seginfo->seg = NULL;
1408 0 : seginfo->items = palloc(nuncompressed * sizeof(ItemPointerData));
1409 0 : memcpy(seginfo->items, uncompressed, nuncompressed * sizeof(ItemPointerData));
1410 0 : seginfo->nitems = nuncompressed;
1411 :
1412 0 : dlist_push_tail(&leaf->segments, &seginfo->node);
1413 :
1414 0 : leaf->oldformat = true;
1415 : }
1416 :
1417 3345 : return leaf;
1418 : }
1419 :
1420 : /*
1421 : * Distribute newItems to the segments.
1422 : *
1423 : * Any segments that acquire new items are decoded, and the new items are
1424 : * merged with the old items.
1425 : *
1426 : * Returns true if any new items were added. False means they were all
1427 : * duplicates of existing items on the page.
1428 : */
1429 : static bool
1430 3337 : addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems, int nNewItems)
1431 : {
1432 : dlist_iter iter;
1433 3337 : ItemPointer nextnew = newItems;
1434 3337 : int newleft = nNewItems;
1435 3337 : bool modified = false;
1436 : leafSegmentInfo *newseg;
1437 :
1438 : /*
1439 : * If the page is completely empty, just construct one new segment to hold
1440 : * all the new items.
1441 : */
1442 3337 : if (dlist_is_empty(&leaf->segments))
1443 : {
1444 0 : newseg = palloc(sizeof(leafSegmentInfo));
1445 0 : newseg->seg = NULL;
1446 0 : newseg->items = newItems;
1447 0 : newseg->nitems = nNewItems;
1448 0 : newseg->action = GIN_SEGMENT_INSERT;
1449 0 : dlist_push_tail(&leaf->segments, &newseg->node);
1450 0 : return true;
1451 : }
1452 :
1453 60080 : dlist_foreach(iter, &leaf->segments)
1454 : {
1455 60080 : leafSegmentInfo *cur = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node, iter.cur);
1456 : int nthis;
1457 : ItemPointer tmpitems;
1458 : int ntmpitems;
1459 :
1460 : /*
1461 : * How many of the new items fall into this segment?
1462 : */
1463 60080 : if (!dlist_has_next(&leaf->segments, iter.cur))
1464 3337 : nthis = newleft;
1465 : else
1466 : {
1467 : leafSegmentInfo *next;
1468 : ItemPointerData next_first;
1469 :
1470 56743 : next = (leafSegmentInfo *) dlist_container(leafSegmentInfo, node,
1471 : dlist_next_node(&leaf->segments, iter.cur));
1472 56743 : if (next->items)
1473 3337 : next_first = next->items[0];
1474 : else
1475 : {
1476 53406 : Assert(next->seg != NULL);
1477 53406 : next_first = next->seg->first;
1478 : }
1479 :
1480 56743 : nthis = 0;
1481 113486 : while (nthis < newleft && ginCompareItemPointers(&nextnew[nthis], &next_first) < 0)
1482 0 : nthis++;
1483 : }
1484 60080 : if (nthis == 0)
1485 56743 : continue;
1486 :
1487 : /* Merge the new items with the existing items. */
1488 3337 : if (!cur->items)
1489 0 : cur->items = ginPostingListDecode(cur->seg, &cur->nitems);
1490 :
1491 : /*
1492 : * Fast path for the important special case that we're appending to
1493 : * the end of the page: don't let the last segment on the page grow
1494 : * larger than the target, create a new segment before that happens.
1495 : */
1496 6674 : if (!dlist_has_next(&leaf->segments, iter.cur) &&
1497 6674 : ginCompareItemPointers(&cur->items[cur->nitems - 1], &nextnew[0]) < 0 &&
1498 6674 : cur->seg != NULL &&
1499 3337 : SizeOfGinPostingList(cur->seg) >= GinPostingListSegmentTargetSize)
1500 : {
1501 21 : newseg = palloc(sizeof(leafSegmentInfo));
1502 21 : newseg->seg = NULL;
1503 21 : newseg->items = nextnew;
1504 21 : newseg->nitems = nthis;
1505 21 : newseg->action = GIN_SEGMENT_INSERT;
1506 21 : dlist_push_tail(&leaf->segments, &newseg->node);
1507 21 : modified = true;
1508 3358 : break;
1509 : }
1510 :
1511 3316 : tmpitems = ginMergeItemPointers(cur->items, cur->nitems,
1512 : nextnew, nthis,
1513 : &ntmpitems);
1514 3316 : if (ntmpitems != cur->nitems)
1515 : {
1516 : /*
1517 : * If there are no duplicates, track the added items so that we
1518 : * can emit a compact ADDITEMS WAL record later on. (it doesn't
1519 : * seem worth re-checking which items were duplicates, if there
1520 : * were any)
1521 : */
1522 6632 : if (ntmpitems == nthis + cur->nitems &&
1523 3316 : cur->action == GIN_SEGMENT_UNMODIFIED)
1524 : {
1525 3316 : cur->action = GIN_SEGMENT_ADDITEMS;
1526 3316 : cur->modifieditems = nextnew;
1527 3316 : cur->nmodifieditems = nthis;
1528 : }
1529 : else
1530 0 : cur->action = GIN_SEGMENT_REPLACE;
1531 :
1532 3316 : cur->items = tmpitems;
1533 3316 : cur->nitems = ntmpitems;
1534 3316 : cur->seg = NULL;
1535 3316 : modified = true;
1536 : }
1537 :
1538 3316 : nextnew += nthis;
1539 3316 : newleft -= nthis;
1540 3316 : if (newleft == 0)
1541 3316 : break;
1542 : }
1543 :
1544 3337 : return modified;
1545 : }
1546 :
1547 : /*
1548 : * Recompresses all segments that have been modified.
1549 : *
1550 : * If not all the items fit on two pages (ie. after split), we store as
1551 : * many items as fit, and set *remaining to the first item that didn't fit.
1552 : * If all items fit, *remaining is set to invalid.
1553 : *
1554 : * Returns true if the page has to be split.
1555 : */
1556 : static bool
1557 3337 : leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining)
1558 : {
1559 3337 : int pgused = 0;
1560 3337 : bool needsplit = false;
1561 : dlist_iter iter;
1562 : int segsize;
1563 : leafSegmentInfo *nextseg;
1564 : int npacked;
1565 : bool modified;
1566 : dlist_node *cur_node;
1567 : dlist_node *next_node;
1568 :
1569 3337 : ItemPointerSetInvalid(remaining);
1570 :
1571 : /*
1572 : * cannot use dlist_foreach_modify here because we insert adjacent items
1573 : * while iterating.
1574 : */
1575 66884 : for (cur_node = dlist_head_node(&leaf->segments);
1576 : cur_node != NULL;
1577 60210 : cur_node = next_node)
1578 : {
1579 60212 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
1580 : cur_node);
1581 :
1582 60212 : if (dlist_has_next(&leaf->segments, cur_node))
1583 56764 : next_node = dlist_next_node(&leaf->segments, cur_node);
1584 : else
1585 3448 : next_node = NULL;
1586 :
1587 : /* Compress the posting list, if necessary */
1588 60212 : if (seginfo->action != GIN_SEGMENT_DELETE)
1589 : {
1590 60212 : if (seginfo->seg == NULL)
1591 : {
1592 3448 : if (seginfo->nitems > GinPostingListSegmentMaxSize)
1593 113 : npacked = 0; /* no chance that it would fit. */
1594 : else
1595 : {
1596 3335 : seginfo->seg = ginCompressPostingList(seginfo->items,
1597 : seginfo->nitems,
1598 : GinPostingListSegmentMaxSize,
1599 : &npacked);
1600 : }
1601 3448 : if (npacked != seginfo->nitems)
1602 : {
1603 : /*
1604 : * Too large. Compress again to the target size, and
1605 : * create a new segment to represent the remaining items.
1606 : * The new segment is inserted after this one, so it will
1607 : * be processed in the next iteration of this loop.
1608 : */
1609 113 : if (seginfo->seg)
1610 0 : pfree(seginfo->seg);
1611 113 : seginfo->seg = ginCompressPostingList(seginfo->items,
1612 : seginfo->nitems,
1613 : GinPostingListSegmentTargetSize,
1614 : &npacked);
1615 113 : if (seginfo->action != GIN_SEGMENT_INSERT)
1616 0 : seginfo->action = GIN_SEGMENT_REPLACE;
1617 :
1618 113 : nextseg = palloc(sizeof(leafSegmentInfo));
1619 113 : nextseg->action = GIN_SEGMENT_INSERT;
1620 113 : nextseg->seg = NULL;
1621 113 : nextseg->items = &seginfo->items[npacked];
1622 113 : nextseg->nitems = seginfo->nitems - npacked;
1623 113 : next_node = &nextseg->node;
1624 113 : dlist_insert_after(cur_node, next_node);
1625 : }
1626 : }
1627 :
1628 : /*
1629 : * If the segment is very small, merge it with the next segment.
1630 : */
1631 60212 : if (SizeOfGinPostingList(seginfo->seg) < GinPostingListSegmentMinSize && next_node)
1632 : {
1633 : int nmerged;
1634 :
1635 0 : nextseg = dlist_container(leafSegmentInfo, node, next_node);
1636 :
1637 0 : if (seginfo->items == NULL)
1638 0 : seginfo->items = ginPostingListDecode(seginfo->seg,
1639 : &seginfo->nitems);
1640 0 : if (nextseg->items == NULL)
1641 0 : nextseg->items = ginPostingListDecode(nextseg->seg,
1642 : &nextseg->nitems);
1643 0 : nextseg->items =
1644 0 : ginMergeItemPointers(seginfo->items, seginfo->nitems,
1645 0 : nextseg->items, nextseg->nitems,
1646 : &nmerged);
1647 0 : Assert(nmerged == seginfo->nitems + nextseg->nitems);
1648 0 : nextseg->nitems = nmerged;
1649 0 : nextseg->seg = NULL;
1650 :
1651 0 : nextseg->action = GIN_SEGMENT_REPLACE;
1652 0 : nextseg->modifieditems = NULL;
1653 0 : nextseg->nmodifieditems = 0;
1654 :
1655 0 : if (seginfo->action == GIN_SEGMENT_INSERT)
1656 : {
1657 0 : dlist_delete(cur_node);
1658 0 : continue;
1659 : }
1660 : else
1661 : {
1662 0 : seginfo->action = GIN_SEGMENT_DELETE;
1663 0 : seginfo->seg = NULL;
1664 : }
1665 : }
1666 :
1667 60212 : seginfo->items = NULL;
1668 60212 : seginfo->nitems = 0;
1669 : }
1670 :
1671 60212 : if (seginfo->action == GIN_SEGMENT_DELETE)
1672 0 : continue;
1673 :
1674 : /*
1675 : * OK, we now have a compressed version of this segment ready for
1676 : * copying to the page. Did we exceed the size that fits on one page?
1677 : */
1678 60212 : segsize = SizeOfGinPostingList(seginfo->seg);
1679 60212 : if (pgused + segsize > GinDataPageMaxDataSize)
1680 : {
1681 8 : if (!needsplit)
1682 : {
1683 : /* switch to right page */
1684 6 : Assert(pgused > 0);
1685 6 : leaf->lastleft = dlist_prev_node(&leaf->segments, cur_node);
1686 6 : needsplit = true;
1687 6 : leaf->lsize = pgused;
1688 6 : pgused = 0;
1689 : }
1690 : else
1691 : {
1692 : /*
1693 : * Filled both pages. The last segment we constructed did not
1694 : * fit.
1695 : */
1696 2 : *remaining = seginfo->seg->first;
1697 :
1698 : /*
1699 : * remove all segments that did not fit from the list.
1700 : */
1701 6 : while (dlist_has_next(&leaf->segments, cur_node))
1702 2 : dlist_delete(dlist_next_node(&leaf->segments, cur_node));
1703 2 : dlist_delete(cur_node);
1704 2 : break;
1705 : }
1706 : }
1707 :
1708 60210 : pgused += segsize;
1709 : }
1710 :
1711 3337 : if (!needsplit)
1712 : {
1713 3331 : leaf->lsize = pgused;
1714 3331 : leaf->rsize = 0;
1715 : }
1716 : else
1717 6 : leaf->rsize = pgused;
1718 :
1719 3337 : Assert(leaf->lsize <= GinDataPageMaxDataSize);
1720 3337 : Assert(leaf->rsize <= GinDataPageMaxDataSize);
1721 :
1722 : /*
1723 : * Make a palloc'd copy of every segment after the first modified one,
1724 : * because as we start copying items to the original page, we might
1725 : * overwrite an existing segment.
1726 : */
1727 3337 : modified = false;
1728 63547 : dlist_foreach(iter, &leaf->segments)
1729 : {
1730 60210 : leafSegmentInfo *seginfo = dlist_container(leafSegmentInfo, node,
1731 : iter.cur);
1732 :
1733 60210 : if (!modified && seginfo->action != GIN_SEGMENT_UNMODIFIED)
1734 : {
1735 3337 : modified = true;
1736 : }
1737 56873 : else if (modified && seginfo->action == GIN_SEGMENT_UNMODIFIED)
1738 : {
1739 : GinPostingList *tmp;
1740 :
1741 0 : segsize = SizeOfGinPostingList(seginfo->seg);
1742 0 : tmp = palloc(segsize);
1743 0 : memcpy(tmp, seginfo->seg, segsize);
1744 0 : seginfo->seg = tmp;
1745 : }
1746 : }
1747 :
1748 3337 : return needsplit;
1749 : }
1750 :
1751 :
1752 : /*** Functions that are exported to the rest of the GIN code ***/
1753 :
1754 : /*
1755 : * Creates new posting tree containing the given TIDs. Returns the page
1756 : * number of the root of the new posting tree.
1757 : *
1758 : * items[] must be in sorted order with no duplicates.
1759 : */
1760 : BlockNumber
1761 4 : createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
1762 : GinStatsData *buildStats)
1763 : {
1764 : BlockNumber blkno;
1765 : Buffer buffer;
1766 : Page tmppage;
1767 : Page page;
1768 : Pointer ptr;
1769 : int nrootitems;
1770 : int rootsize;
1771 :
1772 : /* Construct the new root page in memory first. */
1773 4 : tmppage = (Page) palloc(BLCKSZ);
1774 4 : GinInitPage(tmppage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
1775 4 : GinPageGetOpaque(tmppage)->rightlink = InvalidBlockNumber;
1776 :
1777 : /*
1778 : * Write as many of the items to the root page as fit. In segments of max
1779 : * GinPostingListSegmentMaxSize bytes each.
1780 : */
1781 4 : nrootitems = 0;
1782 4 : rootsize = 0;
1783 4 : ptr = (Pointer) GinDataLeafPageGetPostingList(tmppage);
1784 79 : while (nrootitems < nitems)
1785 : {
1786 : GinPostingList *segment;
1787 : int npacked;
1788 : int segsize;
1789 :
1790 74 : segment = ginCompressPostingList(&items[nrootitems],
1791 74 : nitems - nrootitems,
1792 : GinPostingListSegmentMaxSize,
1793 : &npacked);
1794 74 : segsize = SizeOfGinPostingList(segment);
1795 74 : if (rootsize + segsize > GinDataPageMaxDataSize)
1796 3 : break;
1797 :
1798 71 : memcpy(ptr, segment, segsize);
1799 71 : ptr += segsize;
1800 71 : rootsize += segsize;
1801 71 : nrootitems += npacked;
1802 71 : pfree(segment);
1803 : }
1804 4 : GinDataPageSetDataSize(tmppage, rootsize);
1805 :
1806 : /*
1807 : * All set. Get a new physical page, and copy the in-memory page to it.
1808 : */
1809 4 : buffer = GinNewBuffer(index);
1810 4 : page = BufferGetPage(buffer);
1811 4 : blkno = BufferGetBlockNumber(buffer);
1812 :
1813 4 : START_CRIT_SECTION();
1814 :
1815 4 : PageRestoreTempPage(tmppage, page);
1816 4 : MarkBufferDirty(buffer);
1817 :
1818 4 : if (RelationNeedsWAL(index))
1819 : {
1820 : XLogRecPtr recptr;
1821 : ginxlogCreatePostingTree data;
1822 :
1823 4 : data.size = rootsize;
1824 :
1825 4 : XLogBeginInsert();
1826 4 : XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
1827 :
1828 4 : XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
1829 : rootsize);
1830 4 : XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
1831 :
1832 4 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
1833 4 : PageSetLSN(page, recptr);
1834 : }
1835 :
1836 4 : UnlockReleaseBuffer(buffer);
1837 :
1838 4 : END_CRIT_SECTION();
1839 :
1840 : /* During index build, count the newly-added data page */
1841 4 : if (buildStats)
1842 1 : buildStats->nDataPages++;
1843 :
1844 4 : elog(DEBUG2, "created GIN posting tree with %d items", nrootitems);
1845 :
1846 : /*
1847 : * Add any remaining TIDs to the newly-created posting tree.
1848 : */
1849 4 : if (nitems > nrootitems)
1850 : {
1851 6 : ginInsertItemPointers(index, blkno,
1852 3 : items + nrootitems,
1853 : nitems - nrootitems,
1854 : buildStats);
1855 : }
1856 :
1857 4 : return blkno;
1858 : }
1859 :
1860 : void
1861 3337 : ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno)
1862 : {
1863 3337 : memset(btree, 0, sizeof(GinBtreeData));
1864 :
1865 3337 : btree->index = index;
1866 3337 : btree->rootBlkno = rootBlkno;
1867 :
1868 3337 : btree->findChildPage = dataLocateItem;
1869 3337 : btree->getLeftMostChild = dataGetLeftMostPage;
1870 3337 : btree->isMoveRight = dataIsMoveRight;
1871 3337 : btree->findItem = NULL;
1872 3337 : btree->findChildPtr = dataFindChildPtr;
1873 3337 : btree->beginPlaceToPage = dataBeginPlaceToPage;
1874 3337 : btree->execPlaceToPage = dataExecPlaceToPage;
1875 3337 : btree->fillRoot = ginDataFillRoot;
1876 3337 : btree->prepareDownlink = dataPrepareDownlink;
1877 :
1878 3337 : btree->isData = TRUE;
1879 3337 : btree->fullScan = FALSE;
1880 3337 : btree->isBuild = FALSE;
1881 3337 : }
1882 :
1883 : /*
1884 : * Inserts array of item pointers, may execute several tree scan (very rare)
1885 : */
1886 : void
1887 3335 : ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
1888 : ItemPointerData *items, uint32 nitem,
1889 : GinStatsData *buildStats)
1890 : {
1891 : GinBtreeData btree;
1892 : GinBtreeDataLeafInsertData insertdata;
1893 : GinBtreeStack *stack;
1894 :
1895 3335 : ginPrepareDataScan(&btree, index, rootBlkno);
1896 3335 : btree.isBuild = (buildStats != NULL);
1897 3335 : insertdata.items = items;
1898 3335 : insertdata.nitem = nitem;
1899 3335 : insertdata.curitem = 0;
1900 :
1901 10007 : while (insertdata.curitem < insertdata.nitem)
1902 : {
1903 : /* search for the leaf page where the first item should go to */
1904 3337 : btree.itemptr = insertdata.items[insertdata.curitem];
1905 3337 : stack = ginFindLeafPage(&btree, false, NULL);
1906 :
1907 3337 : ginInsertValue(&btree, stack, &insertdata, buildStats);
1908 : }
1909 3335 : }
1910 :
1911 : /*
1912 : * Starts a new scan on a posting tree.
1913 : */
1914 : GinBtreeStack *
1915 2 : ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno,
1916 : Snapshot snapshot)
1917 : {
1918 : GinBtreeStack *stack;
1919 :
1920 2 : ginPrepareDataScan(btree, index, rootBlkno);
1921 :
1922 2 : btree->fullScan = TRUE;
1923 :
1924 2 : stack = ginFindLeafPage(btree, TRUE, snapshot);
1925 :
1926 2 : return stack;
1927 : }
|