Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistbuildbuffers.c
4 : * node buffer management functions for GiST buffering build algorithm.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gistbuildbuffers.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/genam.h"
18 : #include "access/gist_private.h"
19 : #include "catalog/index.h"
20 : #include "miscadmin.h"
21 : #include "storage/buffile.h"
22 : #include "storage/bufmgr.h"
23 : #include "utils/memutils.h"
24 : #include "utils/rel.h"
25 :
26 : static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb);
27 : static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb,
28 : GISTNodeBuffer *nodeBuffer);
29 : static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb,
30 : GISTNodeBuffer *nodeBuffer);
31 : static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb,
32 : GISTNodeBuffer *nodeBuffer);
33 : static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer,
34 : IndexTuple item);
35 : static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer,
36 : IndexTuple *item);
37 : static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb);
38 : static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum);
39 :
40 : static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr);
41 : static void WriteTempFileBlock(BufFile *file, long blknum, void *ptr);
42 :
43 :
44 : /*
45 : * Initialize GiST build buffers.
46 : */
47 : GISTBuildBuffers *
48 1 : gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
49 : {
50 : GISTBuildBuffers *gfbb;
51 : HASHCTL hashCtl;
52 :
53 1 : gfbb = palloc(sizeof(GISTBuildBuffers));
54 1 : gfbb->pagesPerBuffer = pagesPerBuffer;
55 1 : gfbb->levelStep = levelStep;
56 :
57 : /*
58 : * Create a temporary file to hold buffer pages that are swapped out of
59 : * memory.
60 : */
61 1 : gfbb->pfile = BufFileCreateTemp(false);
62 1 : gfbb->nFileBlocks = 0;
63 :
64 : /* Initialize free page management. */
65 1 : gfbb->nFreeBlocks = 0;
66 1 : gfbb->freeBlocksLen = 32;
67 1 : gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));
68 :
69 : /*
70 : * Current memory context will be used for all in-memory data structures
71 : * of buffers which are persistent during buffering build.
72 : */
73 1 : gfbb->context = CurrentMemoryContext;
74 :
75 : /*
76 : * nodeBuffersTab hash is association between index blocks and it's
77 : * buffers.
78 : */
79 1 : memset(&hashCtl, 0, sizeof(hashCtl));
80 1 : hashCtl.keysize = sizeof(BlockNumber);
81 1 : hashCtl.entrysize = sizeof(GISTNodeBuffer);
82 1 : hashCtl.hcxt = CurrentMemoryContext;
83 1 : gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
84 : 1024,
85 : &hashCtl,
86 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
87 :
88 1 : gfbb->bufferEmptyingQueue = NIL;
89 :
90 : /*
91 : * Per-level node buffers lists for final buffers emptying process. Node
92 : * buffers are inserted here when they are created.
93 : */
94 1 : gfbb->buffersOnLevelsLen = 1;
95 1 : gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) *
96 1 : gfbb->buffersOnLevelsLen);
97 1 : gfbb->buffersOnLevels[0] = NIL;
98 :
99 : /*
100 : * Block numbers of node buffers which last pages are currently loaded
101 : * into main memory.
102 : */
103 1 : gfbb->loadedBuffersLen = 32;
104 1 : gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen *
105 : sizeof(GISTNodeBuffer *));
106 1 : gfbb->loadedBuffersCount = 0;
107 :
108 1 : gfbb->rootlevel = maxLevel;
109 :
110 1 : return gfbb;
111 : }
112 :
113 : /*
114 : * Returns a node buffer for given block. The buffer is created if it
115 : * doesn't exist yet.
116 : */
117 : GISTNodeBuffer *
118 15727 : gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
119 : BlockNumber nodeBlocknum, int level)
120 : {
121 : GISTNodeBuffer *nodeBuffer;
122 : bool found;
123 :
124 : /* Find node buffer in hash table */
125 15727 : nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab,
126 : (const void *) &nodeBlocknum,
127 : HASH_ENTER,
128 : &found);
129 15727 : if (!found)
130 : {
131 : /*
132 : * Node buffer wasn't found. Initialize the new buffer as empty.
133 : */
134 5 : MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
135 :
136 : /* nodeBuffer->nodeBlocknum is the hash key and was filled in already */
137 5 : nodeBuffer->blocksCount = 0;
138 5 : nodeBuffer->pageBlocknum = InvalidBlockNumber;
139 5 : nodeBuffer->pageBuffer = NULL;
140 5 : nodeBuffer->queuedForEmptying = false;
141 5 : nodeBuffer->level = level;
142 :
143 : /*
144 : * Add this buffer to the list of buffers on this level. Enlarge
145 : * buffersOnLevels array if needed.
146 : */
147 5 : if (level >= gfbb->buffersOnLevelsLen)
148 : {
149 : int i;
150 :
151 1 : gfbb->buffersOnLevels =
152 1 : (List **) repalloc(gfbb->buffersOnLevels,
153 : (level + 1) * sizeof(List *));
154 :
155 : /* initialize the enlarged portion */
156 2 : for (i = gfbb->buffersOnLevelsLen; i <= level; i++)
157 1 : gfbb->buffersOnLevels[i] = NIL;
158 1 : gfbb->buffersOnLevelsLen = level + 1;
159 : }
160 :
161 : /*
162 : * Prepend the new buffer to the list of buffers on this level. It's
163 : * not arbitrary that the new buffer is put to the beginning of the
164 : * list: in the final emptying phase we loop through all buffers at
165 : * each level, and flush them. If a page is split during the emptying,
166 : * it's more efficient to flush the new splitted pages first, before
167 : * moving on to pre-existing pages on the level. The buffers just
168 : * created during the page split are likely still in cache, so
169 : * flushing them immediately is more efficient than putting them to
170 : * the end of the queue.
171 : */
172 10 : gfbb->buffersOnLevels[level] = lcons(nodeBuffer,
173 5 : gfbb->buffersOnLevels[level]);
174 :
175 5 : MemoryContextSwitchTo(oldcxt);
176 : }
177 :
178 15727 : return nodeBuffer;
179 : }
180 :
181 : /*
182 : * Allocate memory for a buffer page.
183 : */
184 : static GISTNodeBufferPage *
185 8 : gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb)
186 : {
187 : GISTNodeBufferPage *pageBuffer;
188 :
189 8 : pageBuffer = (GISTNodeBufferPage *) MemoryContextAlloc(gfbb->context,
190 : BLCKSZ);
191 8 : pageBuffer->prev = InvalidBlockNumber;
192 :
193 : /* Set page free space */
194 8 : PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
195 8 : return pageBuffer;
196 : }
197 :
198 : /*
199 : * Add specified buffer into loadedBuffers array.
200 : */
201 : static void
202 8 : gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
203 : {
204 : /* Never add a temporary buffer to the array */
205 8 : if (nodeBuffer->isTemp)
206 16 : return;
207 :
208 : /* Enlarge the array if needed */
209 0 : if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
210 : {
211 0 : gfbb->loadedBuffersLen *= 2;
212 0 : gfbb->loadedBuffers = (GISTNodeBuffer **)
213 0 : repalloc(gfbb->loadedBuffers,
214 0 : gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *));
215 : }
216 :
217 0 : gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer;
218 0 : gfbb->loadedBuffersCount++;
219 : }
220 :
221 : /*
222 : * Load last page of node buffer into main memory.
223 : */
224 : static void
225 0 : gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
226 : {
227 : /* Check if we really should load something */
228 0 : if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0)
229 : {
230 : /* Allocate memory for page */
231 0 : nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
232 :
233 : /* Read block from temporary file */
234 0 : ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum,
235 0 : nodeBuffer->pageBuffer);
236 :
237 : /* Mark file block as free */
238 0 : gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum);
239 :
240 : /* Mark node buffer as loaded */
241 0 : gistAddLoadedBuffer(gfbb, nodeBuffer);
242 0 : nodeBuffer->pageBlocknum = InvalidBlockNumber;
243 : }
244 0 : }
245 :
246 : /*
247 : * Write last page of node buffer to the disk.
248 : */
249 : static void
250 0 : gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
251 : {
252 : /* Check if we have something to write */
253 0 : if (nodeBuffer->pageBuffer)
254 : {
255 : BlockNumber blkno;
256 :
257 : /* Get free file block */
258 0 : blkno = gistBuffersGetFreeBlock(gfbb);
259 :
260 : /* Write block to the temporary file */
261 0 : WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
262 :
263 : /* Free memory of that page */
264 0 : pfree(nodeBuffer->pageBuffer);
265 0 : nodeBuffer->pageBuffer = NULL;
266 :
267 : /* Save block number */
268 0 : nodeBuffer->pageBlocknum = blkno;
269 : }
270 0 : }
271 :
272 : /*
273 : * Write last pages of all node buffers to the disk.
274 : */
275 : void
276 5 : gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)
277 : {
278 : int i;
279 :
280 : /* Unload all the buffers that have a page loaded in memory. */
281 5 : for (i = 0; i < gfbb->loadedBuffersCount; i++)
282 0 : gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]);
283 :
284 : /* Now there are no node buffers with loaded last page */
285 5 : gfbb->loadedBuffersCount = 0;
286 5 : }
287 :
288 : /*
289 : * Add index tuple to buffer page.
290 : */
291 : static void
292 34922 : gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup)
293 : {
294 34922 : Size itupsz = IndexTupleSize(itup);
295 : char *ptr;
296 :
297 : /* There should be enough of space. */
298 34922 : Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz));
299 :
300 : /* Reduce free space value of page to reserve a spot for the tuple. */
301 34922 : PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz);
302 :
303 : /* Get pointer to the spot we reserved (ie. end of free space). */
304 34922 : ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
305 34922 : + PAGE_FREE_SPACE(pageBuffer);
306 :
307 : /* Copy the index tuple there. */
308 34922 : memcpy(ptr, itup, itupsz);
309 34922 : }
310 :
311 : /*
312 : * Get last item from buffer page and remove it from page.
313 : */
314 : static void
315 34922 : gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup)
316 : {
317 : IndexTuple ptr;
318 : Size itupsz;
319 :
320 34922 : Assert(!PAGE_IS_EMPTY(pageBuffer)); /* Page shouldn't be empty */
321 :
322 : /* Get pointer to last index tuple */
323 34922 : ptr = (IndexTuple) ((char *) pageBuffer
324 : + BUFFER_PAGE_DATA_OFFSET
325 34922 : + PAGE_FREE_SPACE(pageBuffer));
326 34922 : itupsz = IndexTupleSize(ptr);
327 :
328 : /* Make a copy of the tuple */
329 34922 : *itup = (IndexTuple) palloc(itupsz);
330 34922 : memcpy(*itup, ptr, itupsz);
331 :
332 : /* Mark the space used by the tuple as free */
333 34922 : PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz);
334 34922 : }
335 :
336 : /*
337 : * Push an index tuple to node buffer.
338 : */
339 : void
340 34922 : gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
341 : IndexTuple itup)
342 : {
343 : /*
344 : * Most part of memory operations will be in buffering build persistent
345 : * context. So, let's switch to it.
346 : */
347 34922 : MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
348 :
349 : /*
350 : * If the buffer is currently empty, create the first page.
351 : */
352 34922 : if (nodeBuffer->blocksCount == 0)
353 : {
354 8 : nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
355 8 : nodeBuffer->blocksCount = 1;
356 8 : gistAddLoadedBuffer(gfbb, nodeBuffer);
357 : }
358 :
359 : /* Load last page of node buffer if it wasn't in memory already */
360 34922 : if (!nodeBuffer->pageBuffer)
361 0 : gistLoadNodeBuffer(gfbb, nodeBuffer);
362 :
363 : /*
364 : * Check if there is enough space on the last page for the tuple.
365 : */
366 34922 : if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
367 : {
368 : /*
369 : * Nope. Swap previous block to disk and allocate a new one.
370 : */
371 : BlockNumber blkno;
372 :
373 : /* Write filled page to the disk */
374 168 : blkno = gistBuffersGetFreeBlock(gfbb);
375 168 : WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
376 :
377 : /*
378 : * Reset the in-memory page as empty, and link the previous block to
379 : * the new page by storing its block number in the prev-link.
380 : */
381 168 : PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
382 : BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata));
383 168 : nodeBuffer->pageBuffer->prev = blkno;
384 :
385 : /* We've just added one more page */
386 168 : nodeBuffer->blocksCount++;
387 : }
388 :
389 34922 : gistPlaceItupToPage(nodeBuffer->pageBuffer, itup);
390 :
391 : /*
392 : * If the buffer just overflowed, add it to the emptying queue.
393 : */
394 34922 : if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying)
395 : {
396 0 : gfbb->bufferEmptyingQueue = lcons(nodeBuffer,
397 : gfbb->bufferEmptyingQueue);
398 0 : nodeBuffer->queuedForEmptying = true;
399 : }
400 :
401 : /* Restore memory context */
402 34922 : MemoryContextSwitchTo(oldcxt);
403 34922 : }
404 :
405 : /*
406 : * Removes one index tuple from node buffer. Returns true if success and false
407 : * if node buffer is empty.
408 : */
409 : bool
410 34930 : gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
411 : IndexTuple *itup)
412 : {
413 : /*
414 : * If node buffer is empty then return false.
415 : */
416 34930 : if (nodeBuffer->blocksCount <= 0)
417 8 : return false;
418 :
419 : /* Load last page of node buffer if needed */
420 34922 : if (!nodeBuffer->pageBuffer)
421 0 : gistLoadNodeBuffer(gfbb, nodeBuffer);
422 :
423 : /*
424 : * Get index tuple from last non-empty page.
425 : */
426 34922 : gistGetItupFromPage(nodeBuffer->pageBuffer, itup);
427 :
428 : /*
429 : * If we just removed the last tuple from the page, fetch previous page on
430 : * this node buffer (if any).
431 : */
432 34922 : if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
433 : {
434 : BlockNumber prevblkno;
435 :
436 : /*
437 : * blocksCount includes the page in pageBuffer, so decrease it now.
438 : */
439 176 : nodeBuffer->blocksCount--;
440 :
441 : /*
442 : * If there's more pages, fetch previous one.
443 : */
444 176 : prevblkno = nodeBuffer->pageBuffer->prev;
445 176 : if (prevblkno != InvalidBlockNumber)
446 : {
447 : /* There is a previous page. Fetch it. */
448 168 : Assert(nodeBuffer->blocksCount > 0);
449 168 : ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer);
450 :
451 : /*
452 : * Now that we've read the block in memory, we can release its
453 : * on-disk block for reuse.
454 : */
455 168 : gistBuffersReleaseBlock(gfbb, prevblkno);
456 : }
457 : else
458 : {
459 : /* No more pages. Free memory. */
460 8 : Assert(nodeBuffer->blocksCount == 0);
461 8 : pfree(nodeBuffer->pageBuffer);
462 8 : nodeBuffer->pageBuffer = NULL;
463 : }
464 : }
465 34922 : return true;
466 : }
467 :
468 : /*
469 : * Select a currently unused block for writing to.
470 : */
471 : static long
472 168 : gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb)
473 : {
474 : /*
475 : * If there are multiple free blocks, we select the one appearing last in
476 : * freeBlocks[]. If there are none, assign the next block at the end of
477 : * the file (causing the file to be extended).
478 : */
479 168 : if (gfbb->nFreeBlocks > 0)
480 92 : return gfbb->freeBlocks[--gfbb->nFreeBlocks];
481 : else
482 76 : return gfbb->nFileBlocks++;
483 : }
484 :
485 : /*
486 : * Return a block# to the freelist.
487 : */
488 : static void
489 168 : gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum)
490 : {
491 : int ndx;
492 :
493 : /* Enlarge freeBlocks array if full. */
494 168 : if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
495 : {
496 2 : gfbb->freeBlocksLen *= 2;
497 2 : gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
498 2 : gfbb->freeBlocksLen *
499 : sizeof(long));
500 : }
501 :
502 : /* Add blocknum to array */
503 168 : ndx = gfbb->nFreeBlocks++;
504 168 : gfbb->freeBlocks[ndx] = blocknum;
505 168 : }
506 :
507 : /*
508 : * Free buffering build data structure.
509 : */
510 : void
511 1 : gistFreeBuildBuffers(GISTBuildBuffers *gfbb)
512 : {
513 : /* Close buffers file. */
514 1 : BufFileClose(gfbb->pfile);
515 :
516 : /* All other things will be freed on memory context release */
517 1 : }
518 :
519 : /*
520 : * Data structure representing information about node buffer for index tuples
521 : * relocation from splitted node buffer.
522 : */
523 : typedef struct
524 : {
525 : GISTENTRY entry[INDEX_MAX_KEYS];
526 : bool isnull[INDEX_MAX_KEYS];
527 : GISTPageSplitInfo *splitinfo;
528 : GISTNodeBuffer *nodeBuffer;
529 : } RelocationBufferInfo;
530 :
531 : /*
532 : * At page split, distribute tuples from the buffer of the split page to
533 : * new buffers for the created page halves. This also adjusts the downlinks
534 : * in 'splitinfo' to include the tuples in the buffers.
535 : */
536 : void
537 207 : gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
538 : Relation r, int level,
539 : Buffer buffer, List *splitinfo)
540 : {
541 : RelocationBufferInfo *relocationBuffersInfos;
542 : bool found;
543 : GISTNodeBuffer *nodeBuffer;
544 : BlockNumber blocknum;
545 : IndexTuple itup;
546 207 : int splitPagesCount = 0,
547 : i;
548 : GISTENTRY entry[INDEX_MAX_KEYS];
549 : bool isnull[INDEX_MAX_KEYS];
550 : GISTNodeBuffer oldBuf;
551 : ListCell *lc;
552 :
553 : /* If the splitted page doesn't have buffers, we have nothing to do. */
554 207 : if (!LEVEL_HAS_BUFFERS(level, gfbb))
555 408 : return;
556 :
557 : /*
558 : * Get the node buffer of the splitted page.
559 : */
560 3 : blocknum = BufferGetBlockNumber(buffer);
561 3 : nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
562 : HASH_FIND, &found);
563 3 : if (!found)
564 : {
565 : /* The page has no buffer, so we have nothing to do. */
566 0 : return;
567 : }
568 :
569 : /*
570 : * Make a copy of the old buffer, as we're going reuse it as the buffer
571 : * for the new left page, which is on the same block as the old page.
572 : * That's not true for the root page, but that's fine because we never
573 : * have a buffer on the root page anyway. The original algorithm as
574 : * described by Arge et al did, but it's of no use, as you might as well
575 : * read the tuples straight from the heap instead of the root buffer.
576 : */
577 3 : Assert(blocknum != GIST_ROOT_BLKNO);
578 3 : memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer));
579 3 : oldBuf.isTemp = true;
580 :
581 : /* Reset the old buffer, used for the new left page from now on */
582 3 : nodeBuffer->blocksCount = 0;
583 3 : nodeBuffer->pageBuffer = NULL;
584 3 : nodeBuffer->pageBlocknum = InvalidBlockNumber;
585 :
586 : /*
587 : * Allocate memory for information about relocation buffers.
588 : */
589 3 : splitPagesCount = list_length(splitinfo);
590 3 : relocationBuffersInfos =
591 3 : (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) *
592 : splitPagesCount);
593 :
594 : /*
595 : * Fill relocation buffers information for node buffers of pages produced
596 : * by split.
597 : */
598 3 : i = 0;
599 9 : foreach(lc, splitinfo)
600 : {
601 6 : GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc);
602 : GISTNodeBuffer *newNodeBuffer;
603 :
604 : /* Decompress parent index tuple of node buffer page. */
605 6 : gistDeCompressAtt(giststate, r,
606 : si->downlink, NULL, (OffsetNumber) 0,
607 6 : relocationBuffersInfos[i].entry,
608 6 : relocationBuffersInfos[i].isnull);
609 :
610 : /*
611 : * Create a node buffer for the page. The leftmost half is on the same
612 : * block as the old page before split, so for the leftmost half this
613 : * will return the original buffer. The tuples on the original buffer
614 : * were relinked to the temporary buffer, so the original one is now
615 : * empty.
616 : */
617 6 : newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level);
618 :
619 6 : relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
620 6 : relocationBuffersInfos[i].splitinfo = si;
621 :
622 6 : i++;
623 : }
624 :
625 : /*
626 : * Loop through all index tuples in the buffer of the page being split,
627 : * moving them to buffers for the new pages. We try to move each tuple to
628 : * the page that will result in the lowest penalty for the leading column
629 : * or, in the case of a tie, the lowest penalty for the earliest column
630 : * that is not tied.
631 : *
632 : * The page searching logic is very similar to gistchoose().
633 : */
634 19207 : while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
635 : {
636 : float best_penalty[INDEX_MAX_KEYS];
637 : int i,
638 : which;
639 : IndexTuple newtup;
640 : RelocationBufferInfo *targetBufferInfo;
641 :
642 19201 : gistDeCompressAtt(giststate, r,
643 : itup, NULL, (OffsetNumber) 0, entry, isnull);
644 :
645 : /* default to using first page (shouldn't matter) */
646 19201 : which = 0;
647 :
648 : /*
649 : * best_penalty[j] is the best penalty we have seen so far for column
650 : * j, or -1 when we haven't yet examined column j. Array entries to
651 : * the right of the first -1 are undefined.
652 : */
653 19201 : best_penalty[0] = -1;
654 :
655 : /*
656 : * Loop over possible target pages, looking for one to move this tuple
657 : * to.
658 : */
659 37636 : for (i = 0; i < splitPagesCount; i++)
660 : {
661 32679 : RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
662 : bool zero_penalty;
663 : int j;
664 :
665 32679 : zero_penalty = true;
666 :
667 : /* Loop over index attributes. */
668 63962 : for (j = 0; j < r->rd_att->natts; j++)
669 : {
670 : float usize;
671 :
672 : /* Compute penalty for this column. */
673 65358 : usize = gistpenalty(giststate, j,
674 : &splitPageInfo->entry[j],
675 32679 : splitPageInfo->isnull[j],
676 32679 : &entry[j], isnull[j]);
677 32679 : if (usize > 0)
678 18435 : zero_penalty = false;
679 :
680 32679 : if (best_penalty[j] < 0 || usize < best_penalty[j])
681 : {
682 : /*
683 : * New best penalty for column. Tentatively select this
684 : * page as the target, and record the best penalty. Then
685 : * reset the next column's penalty to "unknown" (and
686 : * indirectly, the same for all the ones to its right).
687 : * This will force us to adopt this page's penalty values
688 : * as the best for all the remaining columns during
689 : * subsequent loop iterations.
690 : */
691 31283 : which = i;
692 31283 : best_penalty[j] = usize;
693 :
694 62566 : if (j < r->rd_att->natts - 1)
695 0 : best_penalty[j + 1] = -1;
696 : }
697 1396 : else if (best_penalty[j] == usize)
698 : {
699 : /*
700 : * The current page is exactly as good for this column as
701 : * the best page seen so far. The next iteration of this
702 : * loop will compare the next column.
703 : */
704 : }
705 : else
706 : {
707 : /*
708 : * The current page is worse for this column than the best
709 : * page seen so far. Skip the remaining columns and move
710 : * on to the next page, if any.
711 : */
712 1396 : zero_penalty = false; /* so outer loop won't exit */
713 1396 : break;
714 : }
715 : }
716 :
717 : /*
718 : * If we find a page with zero penalty for all columns, there's no
719 : * need to examine remaining pages; just break out of the loop and
720 : * return it.
721 : */
722 32679 : if (zero_penalty)
723 14244 : break;
724 : }
725 :
726 : /* OK, "which" is the page index to push the tuple to */
727 19201 : targetBufferInfo = &relocationBuffersInfos[which];
728 :
729 : /* Push item to selected node buffer */
730 19201 : gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup);
731 :
732 : /* Adjust the downlink for this page, if needed. */
733 19201 : newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink,
734 : itup, giststate);
735 19201 : if (newtup)
736 : {
737 4957 : gistDeCompressAtt(giststate, r,
738 : newtup, NULL, (OffsetNumber) 0,
739 4957 : targetBufferInfo->entry,
740 4957 : targetBufferInfo->isnull);
741 :
742 4957 : targetBufferInfo->splitinfo->downlink = newtup;
743 : }
744 : }
745 :
746 3 : pfree(relocationBuffersInfos);
747 : }
748 :
749 :
750 : /*
751 : * Wrappers around BufFile operations. The main difference is that these
752 : * wrappers report errors with ereport(), so that the callers don't need
753 : * to check the return code.
754 : */
755 :
756 : static void
757 168 : ReadTempFileBlock(BufFile *file, long blknum, void *ptr)
758 : {
759 168 : if (BufFileSeekBlock(file, blknum) != 0)
760 0 : elog(ERROR, "could not seek temporary file: %m");
761 168 : if (BufFileRead(file, ptr, BLCKSZ) != BLCKSZ)
762 0 : elog(ERROR, "could not read temporary file: %m");
763 168 : }
764 :
765 : static void
766 168 : WriteTempFileBlock(BufFile *file, long blknum, void *ptr)
767 : {
768 168 : if (BufFileSeekBlock(file, blknum) != 0)
769 0 : elog(ERROR, "could not seek temporary file: %m");
770 168 : if (BufFileWrite(file, ptr, BLCKSZ) != BLCKSZ)
771 : {
772 : /*
773 : * the other errors in Read/WriteTempFileBlock shouldn't happen, but
774 : * an error at write can easily happen if you run out of disk space.
775 : */
776 0 : ereport(ERROR,
777 : (errcode_for_file_access(),
778 : errmsg("could not write block %ld of temporary file: %m",
779 : blknum)));
780 : }
781 168 : }
|