Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * freepage.c
4 : * Management of free memory pages.
5 : *
6 : * The intention of this code is to provide infrastructure for memory
7 : * allocators written specifically for PostgreSQL. At least in the case
8 : * of dynamic shared memory, we can't simply use malloc() or even
9 : * relatively thin wrappers like palloc() which sit on top of it, because
10 : * no allocator built into the operating system will deal with relative
11 : * pointers. In the future, we may find other cases in which greater
12 : * control over our own memory management seems desirable.
13 : *
14 : * A FreePageManager keeps track of which 4kB pages of memory are currently
15 : * unused from the point of view of some higher-level memory allocator.
16 : * Unlike a user-facing allocator such as palloc(), a FreePageManager can
17 : * only allocate and free in units of whole pages, and freeing an
18 : * allocation can only be done given knowledge of its length in pages.
19 : *
20 : * Since a free page manager has only a fixed amount of dedicated memory,
21 : * and since there is no underlying allocator, it uses the free pages
22 : * it is given to manage to store its bookkeeping data. It keeps multiple
23 : * freelists of runs of pages, sorted by the size of the run; the head of
24 : * each freelist is stored in the FreePageManager itself, and the first
25 : * page of each run contains a relative pointer to the next run. See
26 : * FreePageManagerGetInternal for more details on how the freelists are
27 : * managed.
28 : *
29 : * To avoid memory fragmentation, it's important to consolidate adjacent
30 : * spans of pages whenever possible; otherwise, large allocation requests
31 : * might not be satisfied even when sufficient contiguous space is
32 : * available. Therefore, in addition to the freelists, we maintain an
33 : * in-memory btree of free page ranges ordered by page number. If a
34 : * range being freed precedes or follows a range that is already free,
35 : * the existing range is extended; if it exactly bridges the gap between
36 : * free ranges, then the two existing ranges are consolidated with the
37 : * newly-freed range to form one great big range of free pages.
38 : *
39 : * When there is only one range of free pages, the btree is trivial and
40 : * is stored within the FreePageManager proper; otherwise, pages are
41 : * allocated from the area under management as needed. Even in cases
42 : * where memory fragmentation is very severe, only a tiny fraction of
43 : * the pages under management are consumed by this btree.
44 : *
45 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
46 : * Portions Copyright (c) 1994, Regents of the University of California
47 : *
48 : * IDENTIFICATION
49 : * src/backend/utils/mmgr/freepage.c
50 : *
51 : *-------------------------------------------------------------------------
52 : */
53 :
54 : #include "postgres.h"
55 : #include "lib/stringinfo.h"
56 : #include "miscadmin.h"
57 :
58 : #include "utils/freepage.h"
59 : #include "utils/relptr.h"
60 :
61 :
62 : /* Magic numbers to identify various page types */
63 : #define FREE_PAGE_SPAN_LEADER_MAGIC 0xea4020f0
64 : #define FREE_PAGE_LEAF_MAGIC 0x98eae728
65 : #define FREE_PAGE_INTERNAL_MAGIC 0x19aa32c9
66 :
67 : /* Doubly linked list of spans of free pages; stored in first page of span. */
68 : struct FreePageSpanLeader
69 : {
70 : int magic; /* always FREE_PAGE_SPAN_LEADER_MAGIC */
71 : Size npages; /* number of pages in span */
72 : RelptrFreePageSpanLeader prev;
73 : RelptrFreePageSpanLeader next;
74 : };
75 :
76 : /* Common header for btree leaf and internal pages. */
77 : typedef struct FreePageBtreeHeader
78 : {
79 : int magic; /* FREE_PAGE_LEAF_MAGIC or
80 : * FREE_PAGE_INTERNAL_MAGIC */
81 : Size nused; /* number of items used */
82 : RelptrFreePageBtree parent; /* uplink */
83 : } FreePageBtreeHeader;
84 :
85 : /* Internal key; points to next level of btree. */
86 : typedef struct FreePageBtreeInternalKey
87 : {
88 : Size first_page; /* low bound for keys on child page */
89 : RelptrFreePageBtree child; /* downlink */
90 : } FreePageBtreeInternalKey;
91 :
92 : /* Leaf key; no payload data. */
93 : typedef struct FreePageBtreeLeafKey
94 : {
95 : Size first_page; /* first page in span */
96 : Size npages; /* number of pages in span */
97 : } FreePageBtreeLeafKey;
98 :
99 : /* Work out how many keys will fit on a page. */
100 : #define FPM_ITEMS_PER_INTERNAL_PAGE \
101 : ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
102 : sizeof(FreePageBtreeInternalKey))
103 : #define FPM_ITEMS_PER_LEAF_PAGE \
104 : ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
105 : sizeof(FreePageBtreeLeafKey))
106 :
107 : /* A btree page of either sort */
108 : struct FreePageBtree
109 : {
110 : FreePageBtreeHeader hdr;
111 : union
112 : {
113 : FreePageBtreeInternalKey internal_key[FPM_ITEMS_PER_INTERNAL_PAGE];
114 : FreePageBtreeLeafKey leaf_key[FPM_ITEMS_PER_LEAF_PAGE];
115 : } u;
116 : };
117 :
118 : /* Results of a btree search */
119 : typedef struct FreePageBtreeSearchResult
120 : {
121 : FreePageBtree *page;
122 : Size index;
123 : bool found;
124 : unsigned split_pages;
125 : } FreePageBtreeSearchResult;
126 :
127 : /* Helper functions */
128 : static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
129 : FreePageBtree *btp);
130 : static Size FreePageBtreeCleanup(FreePageManager *fpm);
131 : static FreePageBtree *FreePageBtreeFindLeftSibling(char *base,
132 : FreePageBtree *btp);
133 : static FreePageBtree *FreePageBtreeFindRightSibling(char *base,
134 : FreePageBtree *btp);
135 : static Size FreePageBtreeFirstKey(FreePageBtree *btp);
136 : static FreePageBtree *FreePageBtreeGetRecycled(FreePageManager *fpm);
137 : static void FreePageBtreeInsertInternal(char *base, FreePageBtree *btp,
138 : Size index, Size first_page, FreePageBtree *child);
139 : static void FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index,
140 : Size first_page, Size npages);
141 : static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
142 : static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
143 : Size index);
144 : static void FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp);
145 : static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
146 : FreePageBtreeSearchResult *result);
147 : static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
148 : static Size FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page);
149 : static FreePageBtree *FreePageBtreeSplitPage(FreePageManager *fpm,
150 : FreePageBtree *btp);
151 : static void FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp);
152 : static void FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
153 : FreePageBtree *parent, int level, StringInfo buf);
154 : static void FreePageManagerDumpSpans(FreePageManager *fpm,
155 : FreePageSpanLeader *span, Size expected_pages,
156 : StringInfo buf);
157 : static bool FreePageManagerGetInternal(FreePageManager *fpm, Size npages,
158 : Size *first_page);
159 : static Size FreePageManagerPutInternal(FreePageManager *fpm, Size first_page,
160 : Size npages, bool soft);
161 : static void FreePagePopSpanLeader(FreePageManager *fpm, Size pageno);
162 : static void FreePagePushSpanLeader(FreePageManager *fpm, Size first_page,
163 : Size npages);
164 : static Size FreePageManagerLargestContiguous(FreePageManager *fpm);
165 : static void FreePageManagerUpdateLargest(FreePageManager *fpm);
166 :
167 : #if FPM_EXTRA_ASSERTS
168 : static Size sum_free_pages(FreePageManager *fpm);
169 : #endif
170 :
171 : /*
172 : * Initialize a new, empty free page manager.
173 : *
174 : * 'fpm' should reference caller-provided memory large enough to contain a
175 : * FreePageManager. We'll initialize it here.
176 : *
177 : * 'base' is the address to which all pointers are relative. When managing
178 : * a dynamic shared memory segment, it should normally be the base of the
179 : * segment. When managing backend-private memory, it can be either NULL or,
180 : * if managing a single contiguous extent of memory, the start of that extent.
181 : */
182 : void
183 19 : FreePageManagerInitialize(FreePageManager *fpm, char *base)
184 : {
185 : Size f;
186 :
187 19 : relptr_store(base, fpm->self, fpm);
188 19 : relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
189 19 : relptr_store(base, fpm->btree_recycle, (FreePageSpanLeader *) NULL);
190 19 : fpm->btree_depth = 0;
191 19 : fpm->btree_recycle_count = 0;
192 19 : fpm->singleton_first_page = 0;
193 19 : fpm->singleton_npages = 0;
194 19 : fpm->contiguous_pages = 0;
195 19 : fpm->contiguous_pages_dirty = true;
196 : #ifdef FPM_EXTRA_ASSERTS
197 : fpm->free_pages = 0;
198 : #endif
199 :
200 2470 : for (f = 0; f < FPM_NUM_FREELISTS; f++)
201 2451 : relptr_store(base, fpm->freelist[f], (FreePageSpanLeader *) NULL);
202 19 : }
203 :
204 : /*
205 : * Allocate a run of pages of the given length from the free page manager.
206 : * The return value indicates whether we were able to satisfy the request;
207 : * if true, the first page of the allocation is stored in *first_page.
208 : */
209 : bool
210 31 : FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
211 : {
212 : bool result;
213 : Size contiguous_pages;
214 :
215 31 : result = FreePageManagerGetInternal(fpm, npages, first_page);
216 :
217 : /*
218 : * It's a bit counterintuitive, but allocating pages can actually create
219 : * opportunities for cleanup that create larger ranges. We might pull a
220 : * key out of the btree that enables the item at the head of the btree
221 : * recycle list to be inserted; and then if there are more items behind it
222 : * one of those might cause two currently-separated ranges to merge,
223 : * creating a single range of contiguous pages larger than any that
224 : * existed previously. It might be worth trying to improve the cleanup
225 : * algorithm to avoid such corner cases, but for now we just notice the
226 : * condition and do the appropriate reporting.
227 : */
228 31 : contiguous_pages = FreePageBtreeCleanup(fpm);
229 31 : if (fpm->contiguous_pages < contiguous_pages)
230 0 : fpm->contiguous_pages = contiguous_pages;
231 :
232 : /*
233 : * FreePageManagerGetInternal may have set contiguous_pages_dirty.
234 : * Recompute contigous_pages if so.
235 : */
236 31 : FreePageManagerUpdateLargest(fpm);
237 :
238 : #ifdef FPM_EXTRA_ASSERTS
239 : if (result)
240 : {
241 : Assert(fpm->free_pages >= npages);
242 : fpm->free_pages -= npages;
243 : }
244 : Assert(fpm->free_pages == sum_free_pages(fpm));
245 : Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
246 : #endif
247 31 : return result;
248 : }
249 :
250 : #ifdef FPM_EXTRA_ASSERTS
251 : static void
252 : sum_free_pages_recurse(FreePageManager *fpm, FreePageBtree *btp, Size *sum)
253 : {
254 : char *base = fpm_segment_base(fpm);
255 :
256 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ||
257 : btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
258 : ++*sum;
259 : if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
260 : {
261 : Size index;
262 :
263 :
264 : for (index = 0; index < btp->hdr.nused; ++index)
265 : {
266 : FreePageBtree *child;
267 :
268 : child = relptr_access(base, btp->u.internal_key[index].child);
269 : sum_free_pages_recurse(fpm, child, sum);
270 : }
271 : }
272 : }
273 : static Size
274 : sum_free_pages(FreePageManager *fpm)
275 : {
276 : FreePageSpanLeader *recycle;
277 : char *base = fpm_segment_base(fpm);
278 : Size sum = 0;
279 : int list;
280 :
281 : /* Count the spans by scanning the freelists. */
282 : for (list = 0; list < FPM_NUM_FREELISTS; ++list)
283 : {
284 :
285 : if (!relptr_is_null(fpm->freelist[list]))
286 : {
287 : FreePageSpanLeader *candidate =
288 : relptr_access(base, fpm->freelist[list]);
289 :
290 : do
291 : {
292 : sum += candidate->npages;
293 : candidate = relptr_access(base, candidate->next);
294 : } while (candidate != NULL);
295 : }
296 : }
297 :
298 : /* Count btree internal pages. */
299 : if (fpm->btree_depth > 0)
300 : {
301 : FreePageBtree *root = relptr_access(base, fpm->btree_root);
302 :
303 : sum_free_pages_recurse(fpm, root, &sum);
304 : }
305 :
306 : /* Count the recycle list. */
307 : for (recycle = relptr_access(base, fpm->btree_recycle);
308 : recycle != NULL;
309 : recycle = relptr_access(base, recycle->next))
310 : {
311 : Assert(recycle->npages == 1);
312 : ++sum;
313 : }
314 :
315 : return sum;
316 : }
317 : #endif
318 :
319 : /*
320 : * Compute the size of the largest run of pages that the user could
321 : * successfully get.
322 : */
323 : static Size
324 18 : FreePageManagerLargestContiguous(FreePageManager *fpm)
325 : {
326 : char *base;
327 : Size largest;
328 :
329 18 : base = fpm_segment_base(fpm);
330 18 : largest = 0;
331 18 : if (!relptr_is_null(fpm->freelist[FPM_NUM_FREELISTS - 1]))
332 : {
333 : FreePageSpanLeader *candidate;
334 :
335 18 : candidate = relptr_access(base, fpm->freelist[FPM_NUM_FREELISTS - 1]);
336 : do
337 : {
338 18 : if (candidate->npages > largest)
339 18 : largest = candidate->npages;
340 18 : candidate = relptr_access(base, candidate->next);
341 18 : } while (candidate != NULL);
342 : }
343 : else
344 : {
345 0 : Size f = FPM_NUM_FREELISTS - 1;
346 :
347 : do
348 : {
349 0 : --f;
350 0 : if (!relptr_is_null(fpm->freelist[f]))
351 : {
352 0 : largest = f + 1;
353 0 : break;
354 : }
355 0 : } while (f > 0);
356 : }
357 :
358 18 : return largest;
359 : }
360 :
361 : /*
362 : * Recompute the size of the largest run of pages that the user could
363 : * successfully get, if it has been marked dirty.
364 : */
365 : static void
366 55 : FreePageManagerUpdateLargest(FreePageManager *fpm)
367 : {
368 55 : if (!fpm->contiguous_pages_dirty)
369 92 : return;
370 :
371 18 : fpm->contiguous_pages = FreePageManagerLargestContiguous(fpm);
372 18 : fpm->contiguous_pages_dirty = false;
373 : }
374 :
375 : /*
376 : * Transfer a run of pages to the free page manager.
377 : */
378 : void
379 24 : FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
380 : {
381 : Size contiguous_pages;
382 :
383 24 : Assert(npages > 0);
384 :
385 : /* Record the new pages. */
386 24 : contiguous_pages =
387 : FreePageManagerPutInternal(fpm, first_page, npages, false);
388 :
389 : /*
390 : * If the new range we inserted into the page manager was contiguous with
391 : * an existing range, it may have opened up cleanup opportunities.
392 : */
393 24 : if (contiguous_pages > npages)
394 : {
395 : Size cleanup_contiguous_pages;
396 :
397 10 : cleanup_contiguous_pages = FreePageBtreeCleanup(fpm);
398 10 : if (cleanup_contiguous_pages > contiguous_pages)
399 0 : contiguous_pages = cleanup_contiguous_pages;
400 : }
401 :
402 : /* See if we now have a new largest chunk. */
403 24 : if (fpm->contiguous_pages < contiguous_pages)
404 2 : fpm->contiguous_pages = contiguous_pages;
405 :
406 : /*
407 : * The earlier call to FreePageManagerPutInternal may have set
408 : * contiguous_pages_dirty if it needed to allocate internal pages, so
409 : * recompute contiguous_pages if necessary.
410 : */
411 24 : FreePageManagerUpdateLargest(fpm);
412 :
413 : #ifdef FPM_EXTRA_ASSERTS
414 : fpm->free_pages += npages;
415 : Assert(fpm->free_pages == sum_free_pages(fpm));
416 : Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
417 : #endif
418 24 : }
419 :
420 : /*
421 : * Produce a debugging dump of the state of a free page manager.
422 : */
423 : char *
424 0 : FreePageManagerDump(FreePageManager *fpm)
425 : {
426 0 : char *base = fpm_segment_base(fpm);
427 : StringInfoData buf;
428 : FreePageSpanLeader *recycle;
429 0 : bool dumped_any_freelist = false;
430 : Size f;
431 :
432 : /* Initialize output buffer. */
433 0 : initStringInfo(&buf);
434 :
435 : /* Dump general stuff. */
436 0 : appendStringInfo(&buf, "metadata: self %zu max contiguous pages = %zu\n",
437 : fpm->self.relptr_off, fpm->contiguous_pages);
438 :
439 : /* Dump btree. */
440 0 : if (fpm->btree_depth > 0)
441 : {
442 : FreePageBtree *root;
443 :
444 0 : appendStringInfo(&buf, "btree depth %u:\n", fpm->btree_depth);
445 0 : root = relptr_access(base, fpm->btree_root);
446 0 : FreePageManagerDumpBtree(fpm, root, NULL, 0, &buf);
447 : }
448 0 : else if (fpm->singleton_npages > 0)
449 : {
450 0 : appendStringInfo(&buf, "singleton: %zu(%zu)\n",
451 : fpm->singleton_first_page, fpm->singleton_npages);
452 : }
453 :
454 : /* Dump btree recycle list. */
455 0 : recycle = relptr_access(base, fpm->btree_recycle);
456 0 : if (recycle != NULL)
457 : {
458 0 : appendStringInfoString(&buf, "btree recycle:");
459 0 : FreePageManagerDumpSpans(fpm, recycle, 1, &buf);
460 : }
461 :
462 : /* Dump free lists. */
463 0 : for (f = 0; f < FPM_NUM_FREELISTS; ++f)
464 : {
465 : FreePageSpanLeader *span;
466 :
467 0 : if (relptr_is_null(fpm->freelist[f]))
468 0 : continue;
469 0 : if (!dumped_any_freelist)
470 : {
471 0 : appendStringInfoString(&buf, "freelists:\n");
472 0 : dumped_any_freelist = true;
473 : }
474 0 : appendStringInfo(&buf, " %zu:", f + 1);
475 0 : span = relptr_access(base, fpm->freelist[f]);
476 0 : FreePageManagerDumpSpans(fpm, span, f + 1, &buf);
477 : }
478 :
479 : /* And return result to caller. */
480 0 : return buf.data;
481 : }
482 :
483 :
484 : /*
485 : * The first_page value stored at index zero in any non-root page must match
486 : * the first_page value stored in its parent at the index which points to that
487 : * page. So when the value stored at index zero in a btree page changes, we've
488 : * got to walk up the tree adjusting ancestor keys until we reach an ancestor
489 : * where that key isn't index zero. This function should be called after
490 : * updating the first key on the target page; it will propagate the change
491 : * upward as far as needed.
492 : *
493 : * We assume here that the first key on the page has not changed enough to
494 : * require changes in the ordering of keys on its ancestor pages. Thus,
495 : * if we search the parent page for the first key greater than or equal to
496 : * the first key on the current page, the downlink to this page will be either
497 : * the exact index returned by the search (if the first key decreased)
498 : * or one less (if the first key increased).
499 : */
500 : static void
501 30 : FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
502 : {
503 30 : char *base = fpm_segment_base(fpm);
504 : Size first_page;
505 : FreePageBtree *parent;
506 : FreePageBtree *child;
507 :
508 : /* This might be either a leaf or an internal page. */
509 30 : Assert(btp->hdr.nused > 0);
510 30 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
511 : {
512 30 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
513 30 : first_page = btp->u.leaf_key[0].first_page;
514 : }
515 : else
516 : {
517 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
518 0 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
519 0 : first_page = btp->u.internal_key[0].first_page;
520 : }
521 30 : child = btp;
522 :
523 : /* Loop until we find an ancestor that does not require adjustment. */
524 : for (;;)
525 : {
526 : Size s;
527 :
528 30 : parent = relptr_access(base, child->hdr.parent);
529 30 : if (parent == NULL)
530 30 : break;
531 0 : s = FreePageBtreeSearchInternal(parent, first_page);
532 :
533 : /* Key is either at index s or index s-1; figure out which. */
534 0 : if (s >= parent->hdr.nused)
535 : {
536 0 : Assert(s == parent->hdr.nused);
537 0 : --s;
538 : }
539 : else
540 : {
541 : FreePageBtree *check;
542 :
543 0 : check = relptr_access(base, parent->u.internal_key[s].child);
544 0 : if (check != child)
545 : {
546 0 : Assert(s > 0);
547 0 : --s;
548 : }
549 : }
550 :
551 : #ifdef USE_ASSERT_CHECKING
552 : /* Debugging double-check. */
553 : {
554 : FreePageBtree *check;
555 :
556 0 : check = relptr_access(base, parent->u.internal_key[s].child);
557 0 : Assert(s < parent->hdr.nused);
558 0 : Assert(child == check);
559 : }
560 : #endif
561 :
562 : /* Update the parent key. */
563 0 : parent->u.internal_key[s].first_page = first_page;
564 :
565 : /*
566 : * If this is the first key in the parent, go up another level; else
567 : * done.
568 : */
569 0 : if (s > 0)
570 0 : break;
571 0 : child = parent;
572 0 : }
573 30 : }
574 :
575 : /*
576 : * Attempt to reclaim space from the free-page btree. The return value is
577 : * the largest range of contiguous pages created by the cleanup operation.
578 : */
579 : static Size
580 41 : FreePageBtreeCleanup(FreePageManager *fpm)
581 : {
582 41 : char *base = fpm_segment_base(fpm);
583 41 : Size max_contiguous_pages = 0;
584 :
585 : /* Attempt to shrink the depth of the btree. */
586 83 : while (!relptr_is_null(fpm->btree_root))
587 : {
588 35 : FreePageBtree *root = relptr_access(base, fpm->btree_root);
589 :
590 : /* If the root contains only one key, reduce depth by one. */
591 35 : if (root->hdr.nused == 1)
592 : {
593 : /* Shrink depth of tree by one. */
594 1 : Assert(fpm->btree_depth > 0);
595 1 : --fpm->btree_depth;
596 1 : if (root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
597 : {
598 : /* If root is a leaf, convert only entry to singleton range. */
599 1 : relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
600 1 : fpm->singleton_first_page = root->u.leaf_key[0].first_page;
601 1 : fpm->singleton_npages = root->u.leaf_key[0].npages;
602 : }
603 : else
604 : {
605 : FreePageBtree *newroot;
606 :
607 : /* If root is an internal page, make only child the root. */
608 0 : Assert(root->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
609 0 : relptr_copy(fpm->btree_root, root->u.internal_key[0].child);
610 0 : newroot = relptr_access(base, fpm->btree_root);
611 0 : relptr_store(base, newroot->hdr.parent, (FreePageBtree *) NULL);
612 : }
613 1 : FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, root));
614 : }
615 65 : else if (root->hdr.nused == 2 &&
616 31 : root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
617 : {
618 : Size end_of_first;
619 : Size start_of_second;
620 :
621 62 : end_of_first = root->u.leaf_key[0].first_page +
622 31 : root->u.leaf_key[0].npages;
623 31 : start_of_second = root->u.leaf_key[1].first_page;
624 :
625 31 : if (end_of_first + 1 == start_of_second)
626 : {
627 0 : Size root_page = fpm_pointer_to_page(base, root);
628 :
629 0 : if (end_of_first == root_page)
630 : {
631 0 : FreePagePopSpanLeader(fpm, root->u.leaf_key[0].first_page);
632 0 : FreePagePopSpanLeader(fpm, root->u.leaf_key[1].first_page);
633 0 : fpm->singleton_first_page = root->u.leaf_key[0].first_page;
634 0 : fpm->singleton_npages = root->u.leaf_key[0].npages +
635 0 : root->u.leaf_key[1].npages + 1;
636 0 : fpm->btree_depth = 0;
637 0 : relptr_store(base, fpm->btree_root,
638 : (FreePageBtree *) NULL);
639 0 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
640 : fpm->singleton_npages);
641 0 : Assert(max_contiguous_pages == 0);
642 0 : max_contiguous_pages = fpm->singleton_npages;
643 : }
644 : }
645 :
646 : /* Whether it worked or not, it's time to stop. */
647 31 : break;
648 : }
649 : else
650 : {
651 : /* Nothing more to do. Stop. */
652 : break;
653 : }
654 : }
655 :
656 : /*
657 : * Attempt to free recycled btree pages. We skip this if releasing the
658 : * recycled page would require a btree page split, because the page we're
659 : * trying to recycle would be consumed by the split, which would be
660 : * counterproductive.
661 : *
662 : * We also currently only ever attempt to recycle the first page on the
663 : * list; that could be made more aggressive, but it's not clear that the
664 : * complexity would be worthwhile.
665 : */
666 83 : while (fpm->btree_recycle_count > 0)
667 : {
668 : FreePageBtree *btp;
669 : Size first_page;
670 : Size contiguous_pages;
671 :
672 1 : btp = FreePageBtreeGetRecycled(fpm);
673 1 : first_page = fpm_pointer_to_page(base, btp);
674 1 : contiguous_pages = FreePageManagerPutInternal(fpm, first_page, 1, true);
675 1 : if (contiguous_pages == 0)
676 : {
677 0 : FreePageBtreeRecycle(fpm, first_page);
678 0 : break;
679 : }
680 : else
681 : {
682 1 : if (contiguous_pages > max_contiguous_pages)
683 1 : max_contiguous_pages = contiguous_pages;
684 : }
685 : }
686 :
687 41 : return max_contiguous_pages;
688 : }
689 :
690 : /*
691 : * Consider consolidating the given page with its left or right sibling,
692 : * if it's fairly empty.
693 : */
694 : static void
695 9 : FreePageBtreeConsolidate(FreePageManager *fpm, FreePageBtree *btp)
696 : {
697 9 : char *base = fpm_segment_base(fpm);
698 : FreePageBtree *np;
699 : Size max;
700 :
701 : /*
702 : * We only try to consolidate pages that are less than a third full. We
703 : * could be more aggressive about this, but that might risk performing
704 : * consolidation only to end up splitting again shortly thereafter. Since
705 : * the btree should be very small compared to the space under management,
706 : * our goal isn't so much to ensure that it always occupies the absolutely
707 : * smallest possible number of pages as to reclaim pages before things get
708 : * too egregiously out of hand.
709 : */
710 9 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
711 9 : max = FPM_ITEMS_PER_LEAF_PAGE;
712 : else
713 : {
714 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
715 0 : max = FPM_ITEMS_PER_INTERNAL_PAGE;
716 : }
717 9 : if (btp->hdr.nused >= max / 3)
718 0 : return;
719 :
720 : /*
721 : * If we can fit our right sibling's keys onto this page, consolidate.
722 : */
723 9 : np = FreePageBtreeFindRightSibling(base, btp);
724 9 : if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
725 : {
726 0 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
727 : {
728 0 : memcpy(&btp->u.leaf_key[btp->hdr.nused], &np->u.leaf_key[0],
729 0 : sizeof(FreePageBtreeLeafKey) * np->hdr.nused);
730 0 : btp->hdr.nused += np->hdr.nused;
731 : }
732 : else
733 : {
734 0 : memcpy(&btp->u.internal_key[btp->hdr.nused], &np->u.internal_key[0],
735 0 : sizeof(FreePageBtreeInternalKey) * np->hdr.nused);
736 0 : btp->hdr.nused += np->hdr.nused;
737 0 : FreePageBtreeUpdateParentPointers(base, btp);
738 : }
739 0 : FreePageBtreeRemovePage(fpm, np);
740 0 : return;
741 : }
742 :
743 : /*
744 : * If we can fit our keys onto our left sibling's page, consolidate. In
745 : * this case, we move our keys onto the other page rather than visca
746 : * versa, to avoid having to adjust ancestor keys.
747 : */
748 9 : np = FreePageBtreeFindLeftSibling(base, btp);
749 9 : if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
750 : {
751 0 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
752 : {
753 0 : memcpy(&np->u.leaf_key[np->hdr.nused], &btp->u.leaf_key[0],
754 0 : sizeof(FreePageBtreeLeafKey) * btp->hdr.nused);
755 0 : np->hdr.nused += btp->hdr.nused;
756 : }
757 : else
758 : {
759 0 : memcpy(&np->u.internal_key[np->hdr.nused], &btp->u.internal_key[0],
760 0 : sizeof(FreePageBtreeInternalKey) * btp->hdr.nused);
761 0 : np->hdr.nused += btp->hdr.nused;
762 0 : FreePageBtreeUpdateParentPointers(base, np);
763 : }
764 0 : FreePageBtreeRemovePage(fpm, btp);
765 0 : return;
766 : }
767 : }
768 :
769 : /*
770 : * Find the passed page's left sibling; that is, the page at the same level
771 : * of the tree whose keyspace immediately precedes ours.
772 : */
773 : static FreePageBtree *
774 9 : FreePageBtreeFindLeftSibling(char *base, FreePageBtree *btp)
775 : {
776 9 : FreePageBtree *p = btp;
777 9 : int levels = 0;
778 :
779 : /* Move up until we can move left. */
780 : for (;;)
781 : {
782 : Size first_page;
783 : Size index;
784 :
785 9 : first_page = FreePageBtreeFirstKey(p);
786 9 : p = relptr_access(base, p->hdr.parent);
787 :
788 9 : if (p == NULL)
789 9 : return NULL; /* we were passed the rightmost page */
790 :
791 0 : index = FreePageBtreeSearchInternal(p, first_page);
792 0 : if (index > 0)
793 : {
794 0 : Assert(p->u.internal_key[index].first_page == first_page);
795 0 : p = relptr_access(base, p->u.internal_key[index - 1].child);
796 0 : break;
797 : }
798 0 : Assert(index == 0);
799 0 : ++levels;
800 0 : }
801 :
802 : /* Descend left. */
803 0 : while (levels > 0)
804 : {
805 0 : Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
806 0 : p = relptr_access(base, p->u.internal_key[p->hdr.nused - 1].child);
807 0 : --levels;
808 : }
809 0 : Assert(p->hdr.magic == btp->hdr.magic);
810 :
811 0 : return p;
812 : }
813 :
814 : /*
815 : * Find the passed page's right sibling; that is, the page at the same level
816 : * of the tree whose keyspace immediately follows ours.
817 : */
818 : static FreePageBtree *
819 9 : FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
820 : {
821 9 : FreePageBtree *p = btp;
822 9 : int levels = 0;
823 :
824 : /* Move up until we can move right. */
825 : for (;;)
826 : {
827 : Size first_page;
828 : Size index;
829 :
830 9 : first_page = FreePageBtreeFirstKey(p);
831 9 : p = relptr_access(base, p->hdr.parent);
832 :
833 9 : if (p == NULL)
834 9 : return NULL; /* we were passed the rightmost page */
835 :
836 0 : index = FreePageBtreeSearchInternal(p, first_page);
837 0 : if (index < p->hdr.nused - 1)
838 : {
839 0 : Assert(p->u.internal_key[index].first_page == first_page);
840 0 : p = relptr_access(base, p->u.internal_key[index + 1].child);
841 0 : break;
842 : }
843 0 : Assert(index == p->hdr.nused - 1);
844 0 : ++levels;
845 0 : }
846 :
847 : /* Descend left. */
848 0 : while (levels > 0)
849 : {
850 0 : Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
851 0 : p = relptr_access(base, p->u.internal_key[0].child);
852 0 : --levels;
853 : }
854 0 : Assert(p->hdr.magic == btp->hdr.magic);
855 :
856 0 : return p;
857 : }
858 :
859 : /*
860 : * Get the first key on a btree page.
861 : */
862 : static Size
863 18 : FreePageBtreeFirstKey(FreePageBtree *btp)
864 : {
865 18 : Assert(btp->hdr.nused > 0);
866 :
867 18 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
868 18 : return btp->u.leaf_key[0].first_page;
869 : else
870 : {
871 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
872 0 : return btp->u.internal_key[0].first_page;
873 : }
874 : }
875 :
876 : /*
877 : * Get a page from the btree recycle list for use as a btree page.
878 : */
879 : static FreePageBtree *
880 1 : FreePageBtreeGetRecycled(FreePageManager *fpm)
881 : {
882 1 : char *base = fpm_segment_base(fpm);
883 1 : FreePageSpanLeader *victim = relptr_access(base, fpm->btree_recycle);
884 : FreePageSpanLeader *newhead;
885 :
886 1 : Assert(victim != NULL);
887 1 : newhead = relptr_access(base, victim->next);
888 1 : if (newhead != NULL)
889 0 : relptr_copy(newhead->prev, victim->prev);
890 1 : relptr_store(base, fpm->btree_recycle, newhead);
891 1 : Assert(fpm_pointer_is_page_aligned(base, victim));
892 1 : fpm->btree_recycle_count--;
893 1 : return (FreePageBtree *) victim;
894 : }
895 :
896 : /*
897 : * Insert an item into an internal page.
898 : */
899 : static void
900 0 : FreePageBtreeInsertInternal(char *base, FreePageBtree *btp, Size index,
901 : Size first_page, FreePageBtree *child)
902 : {
903 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
904 0 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
905 0 : Assert(index <= btp->hdr.nused);
906 0 : memmove(&btp->u.internal_key[index + 1], &btp->u.internal_key[index],
907 0 : sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index));
908 0 : btp->u.internal_key[index].first_page = first_page;
909 0 : relptr_store(base, btp->u.internal_key[index].child, child);
910 0 : ++btp->hdr.nused;
911 0 : }
912 :
913 : /*
914 : * Insert an item into a leaf page.
915 : */
916 : static void
917 13 : FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index, Size first_page,
918 : Size npages)
919 : {
920 13 : Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
921 13 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
922 13 : Assert(index <= btp->hdr.nused);
923 13 : memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index],
924 13 : sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
925 13 : btp->u.leaf_key[index].first_page = first_page;
926 13 : btp->u.leaf_key[index].npages = npages;
927 13 : ++btp->hdr.nused;
928 13 : }
929 :
930 : /*
931 : * Put a page on the btree recycle list.
932 : */
933 : static void
934 1 : FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
935 : {
936 1 : char *base = fpm_segment_base(fpm);
937 1 : FreePageSpanLeader *head = relptr_access(base, fpm->btree_recycle);
938 : FreePageSpanLeader *span;
939 :
940 1 : span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
941 1 : span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
942 1 : span->npages = 1;
943 1 : relptr_store(base, span->next, head);
944 1 : relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
945 1 : if (head != NULL)
946 0 : relptr_store(base, head->prev, span);
947 1 : relptr_store(base, fpm->btree_recycle, span);
948 1 : fpm->btree_recycle_count++;
949 1 : }
950 :
951 : /*
952 : * Remove an item from the btree at the given position on the given page.
953 : */
954 : static void
955 9 : FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
956 : {
957 9 : Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
958 9 : Assert(index < btp->hdr.nused);
959 :
960 : /* When last item is removed, extirpate entire page from btree. */
961 9 : if (btp->hdr.nused == 1)
962 : {
963 0 : FreePageBtreeRemovePage(fpm, btp);
964 9 : return;
965 : }
966 :
967 : /* Physically remove the key from the page. */
968 9 : --btp->hdr.nused;
969 9 : if (index < btp->hdr.nused)
970 9 : memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
971 9 : sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
972 :
973 : /* If we just removed the first key, adjust ancestor keys. */
974 9 : if (index == 0)
975 1 : FreePageBtreeAdjustAncestorKeys(fpm, btp);
976 :
977 : /* Consider whether to consolidate this page with a sibling. */
978 9 : FreePageBtreeConsolidate(fpm, btp);
979 : }
980 :
981 : /*
982 : * Remove a page from the btree. Caller is responsible for having relocated
983 : * any keys from this page that are still wanted. The page is placed on the
984 : * recycled list.
985 : */
986 : static void
987 0 : FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp)
988 : {
989 0 : char *base = fpm_segment_base(fpm);
990 : FreePageBtree *parent;
991 : Size index;
992 : Size first_page;
993 :
994 : for (;;)
995 : {
996 : /* Find parent page. */
997 0 : parent = relptr_access(base, btp->hdr.parent);
998 0 : if (parent == NULL)
999 : {
1000 : /* We are removing the root page. */
1001 0 : relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
1002 0 : fpm->btree_depth = 0;
1003 0 : Assert(fpm->singleton_first_page == 0);
1004 0 : Assert(fpm->singleton_npages == 0);
1005 0 : return;
1006 : }
1007 :
1008 : /*
1009 : * If the parent contains only one item, we need to remove it as well.
1010 : */
1011 0 : if (parent->hdr.nused > 1)
1012 0 : break;
1013 0 : FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
1014 0 : btp = parent;
1015 0 : }
1016 :
1017 : /* Find and remove the downlink. */
1018 0 : first_page = FreePageBtreeFirstKey(btp);
1019 0 : if (parent->hdr.magic == FREE_PAGE_LEAF_MAGIC)
1020 : {
1021 0 : index = FreePageBtreeSearchLeaf(parent, first_page);
1022 0 : Assert(index < parent->hdr.nused);
1023 0 : if (index < parent->hdr.nused - 1)
1024 0 : memmove(&parent->u.leaf_key[index],
1025 0 : &parent->u.leaf_key[index + 1],
1026 : sizeof(FreePageBtreeLeafKey)
1027 0 : * (parent->hdr.nused - index - 1));
1028 : }
1029 : else
1030 : {
1031 0 : index = FreePageBtreeSearchInternal(parent, first_page);
1032 0 : Assert(index < parent->hdr.nused);
1033 0 : if (index < parent->hdr.nused - 1)
1034 0 : memmove(&parent->u.internal_key[index],
1035 0 : &parent->u.internal_key[index + 1],
1036 : sizeof(FreePageBtreeInternalKey)
1037 0 : * (parent->hdr.nused - index - 1));
1038 : }
1039 0 : parent->hdr.nused--;
1040 0 : Assert(parent->hdr.nused > 0);
1041 :
1042 : /* Recycle the page. */
1043 0 : FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
1044 :
1045 : /* Adjust ancestor keys if needed. */
1046 0 : if (index == 0)
1047 0 : FreePageBtreeAdjustAncestorKeys(fpm, parent);
1048 :
1049 : /* Consider whether to consolidate the parent with a sibling. */
1050 0 : FreePageBtreeConsolidate(fpm, parent);
1051 : }
1052 :
1053 : /*
1054 : * Search the btree for an entry for the given first page and initialize
1055 : * *result with the results of the search. result->page and result->index
1056 : * indicate either the position of an exact match or the position at which
1057 : * the new key should be inserted. result->found is true for an exact match,
1058 : * otherwise false. result->split_pages will contain the number of additional
1059 : * btree pages that will be needed when performing a split to insert a key.
1060 : * Except as described above, the contents of fields in the result object are
1061 : * undefined on return.
1062 : */
1063 : static void
1064 48 : FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
1065 : FreePageBtreeSearchResult *result)
1066 : {
1067 48 : char *base = fpm_segment_base(fpm);
1068 48 : FreePageBtree *btp = relptr_access(base, fpm->btree_root);
1069 : Size index;
1070 :
1071 48 : result->split_pages = 1;
1072 :
1073 : /* If the btree is empty, there's nothing to find. */
1074 48 : if (btp == NULL)
1075 : {
1076 0 : result->page = NULL;
1077 0 : result->found = false;
1078 48 : return;
1079 : }
1080 :
1081 : /* Descend until we hit a leaf. */
1082 96 : while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
1083 : {
1084 : FreePageBtree *child;
1085 : bool found_exact;
1086 :
1087 0 : index = FreePageBtreeSearchInternal(btp, first_page);
1088 0 : found_exact = index < btp->hdr.nused &&
1089 0 : btp->u.internal_key[index].first_page == first_page;
1090 :
1091 : /*
1092 : * If we found an exact match we descend directly. Otherwise, we
1093 : * descend into the child to the left if possible so that we can find
1094 : * the insertion point at that child's high end.
1095 : */
1096 0 : if (!found_exact && index > 0)
1097 0 : --index;
1098 :
1099 : /* Track required split depth for leaf insert. */
1100 0 : if (btp->hdr.nused >= FPM_ITEMS_PER_INTERNAL_PAGE)
1101 : {
1102 0 : Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
1103 0 : result->split_pages++;
1104 : }
1105 : else
1106 0 : result->split_pages = 0;
1107 :
1108 : /* Descend to appropriate child page. */
1109 0 : Assert(index < btp->hdr.nused);
1110 0 : child = relptr_access(base, btp->u.internal_key[index].child);
1111 0 : Assert(relptr_access(base, child->hdr.parent) == btp);
1112 0 : btp = child;
1113 : }
1114 :
1115 : /* Track required split depth for leaf insert. */
1116 48 : if (btp->hdr.nused >= FPM_ITEMS_PER_LEAF_PAGE)
1117 : {
1118 0 : Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
1119 0 : result->split_pages++;
1120 : }
1121 : else
1122 48 : result->split_pages = 0;
1123 :
1124 : /* Search leaf page. */
1125 48 : index = FreePageBtreeSearchLeaf(btp, first_page);
1126 :
1127 : /* Assemble results. */
1128 48 : result->page = btp;
1129 48 : result->index = index;
1130 96 : result->found = index < btp->hdr.nused &&
1131 48 : first_page == btp->u.leaf_key[index].first_page;
1132 : }
1133 :
1134 : /*
1135 : * Search an internal page for the first key greater than or equal to a given
1136 : * page number. Returns the index of that key, or one greater than the number
1137 : * of keys on the page if none.
1138 : */
1139 : static Size
1140 0 : FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page)
1141 : {
1142 0 : Size low = 0;
1143 0 : Size high = btp->hdr.nused;
1144 :
1145 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
1146 0 : Assert(high > 0 && high <= FPM_ITEMS_PER_INTERNAL_PAGE);
1147 :
1148 0 : while (low < high)
1149 : {
1150 0 : Size mid = (low + high) / 2;
1151 0 : Size val = btp->u.internal_key[mid].first_page;
1152 :
1153 0 : if (first_page == val)
1154 0 : return mid;
1155 0 : else if (first_page < val)
1156 0 : high = mid;
1157 : else
1158 0 : low = mid + 1;
1159 : }
1160 :
1161 0 : return low;
1162 : }
1163 :
1164 : /*
1165 : * Search a leaf page for the first key greater than or equal to a given
1166 : * page number. Returns the index of that key, or one greater than the number
1167 : * of keys on the page if none.
1168 : */
1169 : static Size
1170 48 : FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page)
1171 : {
1172 48 : Size low = 0;
1173 48 : Size high = btp->hdr.nused;
1174 :
1175 48 : Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
1176 48 : Assert(high > 0 && high <= FPM_ITEMS_PER_LEAF_PAGE);
1177 :
1178 160 : while (low < high)
1179 : {
1180 89 : Size mid = (low + high) / 2;
1181 89 : Size val = btp->u.leaf_key[mid].first_page;
1182 :
1183 89 : if (first_page == val)
1184 25 : return mid;
1185 64 : else if (first_page < val)
1186 50 : high = mid;
1187 : else
1188 14 : low = mid + 1;
1189 : }
1190 :
1191 23 : return low;
1192 : }
1193 :
1194 : /*
1195 : * Allocate a new btree page and move half the keys from the provided page
1196 : * to the new page. Caller is responsible for making sure that there's a
1197 : * page available from fpm->btree_recycle. Returns a pointer to the new page,
1198 : * to which caller must add a downlink.
1199 : */
1200 : static FreePageBtree *
1201 0 : FreePageBtreeSplitPage(FreePageManager *fpm, FreePageBtree *btp)
1202 : {
1203 : FreePageBtree *newsibling;
1204 :
1205 0 : newsibling = FreePageBtreeGetRecycled(fpm);
1206 0 : newsibling->hdr.magic = btp->hdr.magic;
1207 0 : newsibling->hdr.nused = btp->hdr.nused / 2;
1208 0 : relptr_copy(newsibling->hdr.parent, btp->hdr.parent);
1209 0 : btp->hdr.nused -= newsibling->hdr.nused;
1210 :
1211 0 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
1212 0 : memcpy(&newsibling->u.leaf_key,
1213 0 : &btp->u.leaf_key[btp->hdr.nused],
1214 0 : sizeof(FreePageBtreeLeafKey) * newsibling->hdr.nused);
1215 : else
1216 : {
1217 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
1218 0 : memcpy(&newsibling->u.internal_key,
1219 0 : &btp->u.internal_key[btp->hdr.nused],
1220 0 : sizeof(FreePageBtreeInternalKey) * newsibling->hdr.nused);
1221 0 : FreePageBtreeUpdateParentPointers(fpm_segment_base(fpm), newsibling);
1222 : }
1223 :
1224 0 : return newsibling;
1225 : }
1226 :
1227 : /*
1228 : * When internal pages are split or merged, the parent pointers of their
1229 : * children must be updated.
1230 : */
1231 : static void
1232 0 : FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp)
1233 : {
1234 : Size i;
1235 :
1236 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
1237 0 : for (i = 0; i < btp->hdr.nused; ++i)
1238 : {
1239 : FreePageBtree *child;
1240 :
1241 0 : child = relptr_access(base, btp->u.internal_key[i].child);
1242 0 : relptr_store(base, child->hdr.parent, btp);
1243 : }
1244 0 : }
1245 :
1246 : /*
1247 : * Debugging dump of btree data.
1248 : */
1249 : static void
1250 0 : FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
1251 : FreePageBtree *parent, int level, StringInfo buf)
1252 : {
1253 0 : char *base = fpm_segment_base(fpm);
1254 0 : Size pageno = fpm_pointer_to_page(base, btp);
1255 : Size index;
1256 : FreePageBtree *check_parent;
1257 :
1258 0 : check_stack_depth();
1259 0 : check_parent = relptr_access(base, btp->hdr.parent);
1260 0 : appendStringInfo(buf, " %zu@%d %c", pageno, level,
1261 0 : btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ? 'i' : 'l');
1262 0 : if (parent != check_parent)
1263 0 : appendStringInfo(buf, " [actual parent %zu, expected %zu]",
1264 0 : fpm_pointer_to_page(base, check_parent),
1265 0 : fpm_pointer_to_page(base, parent));
1266 0 : appendStringInfoChar(buf, ':');
1267 0 : for (index = 0; index < btp->hdr.nused; ++index)
1268 : {
1269 0 : if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
1270 0 : appendStringInfo(buf, " %zu->%zu",
1271 : btp->u.internal_key[index].first_page,
1272 0 : btp->u.internal_key[index].child.relptr_off / FPM_PAGE_SIZE);
1273 : else
1274 0 : appendStringInfo(buf, " %zu(%zu)",
1275 : btp->u.leaf_key[index].first_page,
1276 : btp->u.leaf_key[index].npages);
1277 : }
1278 0 : appendStringInfoChar(buf, '\n');
1279 :
1280 0 : if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
1281 : {
1282 0 : for (index = 0; index < btp->hdr.nused; ++index)
1283 : {
1284 : FreePageBtree *child;
1285 :
1286 0 : child = relptr_access(base, btp->u.internal_key[index].child);
1287 0 : FreePageManagerDumpBtree(fpm, child, btp, level + 1, buf);
1288 : }
1289 : }
1290 0 : }
1291 :
1292 : /*
1293 : * Debugging dump of free-span data.
1294 : */
1295 : static void
1296 0 : FreePageManagerDumpSpans(FreePageManager *fpm, FreePageSpanLeader *span,
1297 : Size expected_pages, StringInfo buf)
1298 : {
1299 0 : char *base = fpm_segment_base(fpm);
1300 :
1301 0 : while (span != NULL)
1302 : {
1303 0 : if (span->npages != expected_pages)
1304 0 : appendStringInfo(buf, " %zu(%zu)", fpm_pointer_to_page(base, span),
1305 : span->npages);
1306 : else
1307 0 : appendStringInfo(buf, " %zu", fpm_pointer_to_page(base, span));
1308 0 : span = relptr_access(base, span->next);
1309 : }
1310 :
1311 0 : appendStringInfoChar(buf, '\n');
1312 0 : }
1313 :
1314 : /*
1315 : * This function allocates a run of pages of the given length from the free
1316 : * page manager.
1317 : */
1318 : static bool
1319 34 : FreePageManagerGetInternal(FreePageManager *fpm, Size npages, Size *first_page)
1320 : {
1321 34 : char *base = fpm_segment_base(fpm);
1322 34 : FreePageSpanLeader *victim = NULL;
1323 : FreePageSpanLeader *prev;
1324 : FreePageSpanLeader *next;
1325 : FreePageBtreeSearchResult result;
1326 34 : Size victim_page = 0; /* placate compiler */
1327 : Size f;
1328 :
1329 : /*
1330 : * Search for a free span.
1331 : *
1332 : * Right now, we use a simple best-fit policy here, but it's possible for
1333 : * this to result in memory fragmentation if we're repeatedly asked to
1334 : * allocate chunks just a little smaller than what we have available.
1335 : * Hopefully, this is unlikely, because we expect most requests to be
1336 : * single pages or superblock-sized chunks -- but no policy can be optimal
1337 : * under all circumstances unless it has knowledge of future allocation
1338 : * patterns.
1339 : */
1340 4064 : for (f = Min(npages, FPM_NUM_FREELISTS) - 1; f < FPM_NUM_FREELISTS; ++f)
1341 : {
1342 : /* Skip empty freelists. */
1343 2032 : if (relptr_is_null(fpm->freelist[f]))
1344 1998 : continue;
1345 :
1346 : /*
1347 : * All of the freelists except the last one contain only items of a
1348 : * single size, so we just take the first one. But the final free
1349 : * list contains everything too big for any of the other lists, so we
1350 : * need to search the list.
1351 : */
1352 34 : if (f < FPM_NUM_FREELISTS - 1)
1353 18 : victim = relptr_access(base, fpm->freelist[f]);
1354 : else
1355 : {
1356 : FreePageSpanLeader *candidate;
1357 :
1358 16 : candidate = relptr_access(base, fpm->freelist[f]);
1359 : do
1360 : {
1361 16 : if (candidate->npages >= npages && (victim == NULL ||
1362 0 : victim->npages > candidate->npages))
1363 : {
1364 16 : victim = candidate;
1365 16 : if (victim->npages == npages)
1366 0 : break;
1367 : }
1368 16 : candidate = relptr_access(base, candidate->next);
1369 16 : } while (candidate != NULL);
1370 : }
1371 34 : break;
1372 : }
1373 :
1374 : /* If we didn't find an allocatable span, return failure. */
1375 34 : if (victim == NULL)
1376 0 : return false;
1377 :
1378 : /* Remove span from free list. */
1379 34 : Assert(victim->magic == FREE_PAGE_SPAN_LEADER_MAGIC);
1380 34 : prev = relptr_access(base, victim->prev);
1381 34 : next = relptr_access(base, victim->next);
1382 34 : if (prev != NULL)
1383 0 : relptr_copy(prev->next, victim->next);
1384 : else
1385 34 : relptr_copy(fpm->freelist[f], victim->next);
1386 34 : if (next != NULL)
1387 0 : relptr_copy(next->prev, victim->prev);
1388 34 : victim_page = fpm_pointer_to_page(base, victim);
1389 :
1390 : /* Decide whether we might be invalidating contiguous_pages. */
1391 50 : if (f == FPM_NUM_FREELISTS - 1 &&
1392 16 : victim->npages == fpm->contiguous_pages)
1393 : {
1394 : /*
1395 : * The victim span came from the oversized freelist, and had the same
1396 : * size as the longest span. There may or may not be another one of
1397 : * the same size, so contiguous_pages must be recomputed just to be
1398 : * safe.
1399 : */
1400 16 : fpm->contiguous_pages_dirty = true;
1401 : }
1402 18 : else if (f + 1 == fpm->contiguous_pages &&
1403 0 : relptr_is_null(fpm->freelist[f]))
1404 : {
1405 : /*
1406 : * The victim span came from a fixed sized freelist, and it was the
1407 : * list for spans of the same size as the current longest span, and
1408 : * the list is now empty after removing the victim. So
1409 : * contiguous_pages must be recomputed without a doubt.
1410 : */
1411 0 : fpm->contiguous_pages_dirty = true;
1412 : }
1413 :
1414 : /*
1415 : * If we haven't initialized the btree yet, the victim must be the single
1416 : * span stored within the FreePageManager itself. Otherwise, we need to
1417 : * update the btree.
1418 : */
1419 34 : if (relptr_is_null(fpm->btree_root))
1420 : {
1421 9 : Assert(victim_page == fpm->singleton_first_page);
1422 9 : Assert(victim->npages == fpm->singleton_npages);
1423 9 : Assert(victim->npages >= npages);
1424 9 : fpm->singleton_first_page += npages;
1425 9 : fpm->singleton_npages -= npages;
1426 9 : if (fpm->singleton_npages > 0)
1427 9 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
1428 : fpm->singleton_npages);
1429 : }
1430 : else
1431 : {
1432 : /*
1433 : * If the span we found is exactly the right size, remove it from the
1434 : * btree completely. Otherwise, adjust the btree entry to reflect the
1435 : * still-unallocated portion of the span, and put that portion on the
1436 : * appropriate free list.
1437 : */
1438 25 : FreePageBtreeSearch(fpm, victim_page, &result);
1439 25 : Assert(result.found);
1440 25 : if (victim->npages == npages)
1441 1 : FreePageBtreeRemove(fpm, result.page, result.index);
1442 : else
1443 : {
1444 : FreePageBtreeLeafKey *key;
1445 :
1446 : /* Adjust btree to reflect remaining pages. */
1447 24 : Assert(victim->npages > npages);
1448 24 : key = &result.page->u.leaf_key[result.index];
1449 24 : Assert(key->npages == victim->npages);
1450 24 : key->first_page += npages;
1451 24 : key->npages -= npages;
1452 24 : if (result.index == 0)
1453 17 : FreePageBtreeAdjustAncestorKeys(fpm, result.page);
1454 :
1455 : /* Put the unallocated pages back on the appropriate free list. */
1456 24 : FreePagePushSpanLeader(fpm, victim_page + npages,
1457 24 : victim->npages - npages);
1458 : }
1459 : }
1460 :
1461 : /* Return results to caller. */
1462 34 : *first_page = fpm_pointer_to_page(base, victim);
1463 34 : return true;
1464 : }
1465 :
1466 : /*
1467 : * Put a range of pages into the btree and freelists, consolidating it with
1468 : * existing free spans just before and/or after it. If 'soft' is true,
1469 : * only perform the insertion if it can be done without allocating new btree
1470 : * pages; if false, do it always. Returns 0 if the soft flag caused the
1471 : * insertion to be skipped, or otherwise the size of the contiguous span
1472 : * created by the insertion. This may be larger than npages if we're able
1473 : * to consolidate with an adjacent range. *internal_pages_used is set to
1474 : * true if the btree allocated pages for internal purposes, which might
1475 : * invalidate the current largest run requiring it to be recomputed.
1476 : */
1477 : static Size
1478 25 : FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
1479 : bool soft)
1480 : {
1481 25 : char *base = fpm_segment_base(fpm);
1482 : FreePageBtreeSearchResult result;
1483 25 : FreePageBtreeLeafKey *prevkey = NULL;
1484 25 : FreePageBtreeLeafKey *nextkey = NULL;
1485 : FreePageBtree *np;
1486 : Size nindex;
1487 :
1488 25 : Assert(npages > 0);
1489 :
1490 : /* We can store a single free span without initializing the btree. */
1491 25 : if (fpm->btree_depth == 0)
1492 : {
1493 5 : if (fpm->singleton_npages == 0)
1494 : {
1495 : /* Don't have a span yet; store this one. */
1496 2 : fpm->singleton_first_page = first_page;
1497 2 : fpm->singleton_npages = npages;
1498 2 : FreePagePushSpanLeader(fpm, first_page, npages);
1499 2 : return fpm->singleton_npages;
1500 : }
1501 3 : else if (fpm->singleton_first_page + fpm->singleton_npages ==
1502 : first_page)
1503 : {
1504 : /* New span immediately follows sole existing span. */
1505 0 : fpm->singleton_npages += npages;
1506 0 : FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
1507 0 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
1508 : fpm->singleton_npages);
1509 0 : return fpm->singleton_npages;
1510 : }
1511 3 : else if (first_page + npages == fpm->singleton_first_page)
1512 : {
1513 : /* New span immediately precedes sole existing span. */
1514 0 : FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
1515 0 : fpm->singleton_first_page = first_page;
1516 0 : fpm->singleton_npages += npages;
1517 0 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
1518 : fpm->singleton_npages);
1519 0 : return fpm->singleton_npages;
1520 : }
1521 : else
1522 : {
1523 : /* Not contiguous; we need to initialize the btree. */
1524 : Size root_page;
1525 : FreePageBtree *root;
1526 :
1527 3 : if (!relptr_is_null(fpm->btree_recycle))
1528 0 : root = FreePageBtreeGetRecycled(fpm);
1529 3 : else if (FreePageManagerGetInternal(fpm, 1, &root_page))
1530 3 : root = (FreePageBtree *) fpm_page_to_pointer(base, root_page);
1531 : else
1532 : {
1533 : /* We'd better be able to get a page from the existing range. */
1534 0 : elog(FATAL, "free page manager btree is corrupt");
1535 : }
1536 :
1537 : /* Create the btree and move the preexisting range into it. */
1538 3 : root->hdr.magic = FREE_PAGE_LEAF_MAGIC;
1539 3 : root->hdr.nused = 1;
1540 3 : relptr_store(base, root->hdr.parent, (FreePageBtree *) NULL);
1541 3 : root->u.leaf_key[0].first_page = fpm->singleton_first_page;
1542 3 : root->u.leaf_key[0].npages = fpm->singleton_npages;
1543 3 : relptr_store(base, fpm->btree_root, root);
1544 3 : fpm->singleton_first_page = 0;
1545 3 : fpm->singleton_npages = 0;
1546 3 : fpm->btree_depth = 1;
1547 :
1548 : /*
1549 : * Corner case: it may be that the btree root took the very last
1550 : * free page. In that case, the sole btree entry covers a zero
1551 : * page run, which is invalid. Overwrite it with the entry we're
1552 : * trying to insert and get out.
1553 : */
1554 3 : if (root->u.leaf_key[0].npages == 0)
1555 : {
1556 0 : root->u.leaf_key[0].first_page = first_page;
1557 0 : root->u.leaf_key[0].npages = npages;
1558 0 : FreePagePushSpanLeader(fpm, first_page, npages);
1559 0 : return npages;
1560 : }
1561 :
1562 : /* Fall through to insert the new key. */
1563 : }
1564 : }
1565 :
1566 : /* Search the btree. */
1567 23 : FreePageBtreeSearch(fpm, first_page, &result);
1568 23 : Assert(!result.found);
1569 23 : if (result.index > 0)
1570 11 : prevkey = &result.page->u.leaf_key[result.index - 1];
1571 23 : if (result.index < result.page->hdr.nused)
1572 : {
1573 23 : np = result.page;
1574 23 : nindex = result.index;
1575 23 : nextkey = &result.page->u.leaf_key[result.index];
1576 : }
1577 : else
1578 : {
1579 0 : np = FreePageBtreeFindRightSibling(base, result.page);
1580 0 : nindex = 0;
1581 0 : if (np != NULL)
1582 0 : nextkey = &np->u.leaf_key[0];
1583 : }
1584 :
1585 : /* Consolidate with the previous entry if possible. */
1586 23 : if (prevkey != NULL && prevkey->first_page + prevkey->npages >= first_page)
1587 : {
1588 10 : bool remove_next = false;
1589 : Size result;
1590 :
1591 10 : Assert(prevkey->first_page + prevkey->npages == first_page);
1592 10 : prevkey->npages = (first_page - prevkey->first_page) + npages;
1593 :
1594 : /* Check whether we can *also* consolidate with the following entry. */
1595 20 : if (nextkey != NULL &&
1596 10 : prevkey->first_page + prevkey->npages >= nextkey->first_page)
1597 : {
1598 8 : Assert(prevkey->first_page + prevkey->npages ==
1599 : nextkey->first_page);
1600 16 : prevkey->npages = (nextkey->first_page - prevkey->first_page)
1601 8 : + nextkey->npages;
1602 8 : FreePagePopSpanLeader(fpm, nextkey->first_page);
1603 8 : remove_next = true;
1604 : }
1605 :
1606 : /* Put the span on the correct freelist and save size. */
1607 10 : FreePagePopSpanLeader(fpm, prevkey->first_page);
1608 10 : FreePagePushSpanLeader(fpm, prevkey->first_page, prevkey->npages);
1609 10 : result = prevkey->npages;
1610 :
1611 : /*
1612 : * If we consolidated with both the preceding and following entries,
1613 : * we must remove the following entry. We do this last, because
1614 : * removing an element from the btree may invalidate pointers we hold
1615 : * into the current data structure.
1616 : *
1617 : * NB: The btree is technically in an invalid state a this point
1618 : * because we've already updated prevkey to cover the same key space
1619 : * as nextkey. FreePageBtreeRemove() shouldn't notice that, though.
1620 : */
1621 10 : if (remove_next)
1622 8 : FreePageBtreeRemove(fpm, np, nindex);
1623 :
1624 10 : return result;
1625 : }
1626 :
1627 : /* Consolidate with the next entry if possible. */
1628 13 : if (nextkey != NULL && first_page + npages >= nextkey->first_page)
1629 : {
1630 : Size newpages;
1631 :
1632 : /* Compute new size for span. */
1633 0 : Assert(first_page + npages == nextkey->first_page);
1634 0 : newpages = (nextkey->first_page - first_page) + nextkey->npages;
1635 :
1636 : /* Put span on correct free list. */
1637 0 : FreePagePopSpanLeader(fpm, nextkey->first_page);
1638 0 : FreePagePushSpanLeader(fpm, first_page, newpages);
1639 :
1640 : /* Update key in place. */
1641 0 : nextkey->first_page = first_page;
1642 0 : nextkey->npages = newpages;
1643 :
1644 : /* If reducing first key on page, ancestors might need adjustment. */
1645 0 : if (nindex == 0)
1646 0 : FreePageBtreeAdjustAncestorKeys(fpm, np);
1647 :
1648 0 : return nextkey->npages;
1649 : }
1650 :
1651 : /* Split leaf page and as many of its ancestors as necessary. */
1652 13 : if (result.split_pages > 0)
1653 : {
1654 : /*
1655 : * NB: We could consider various coping strategies here to avoid a
1656 : * split; most obviously, if np != result.page, we could target that
1657 : * page instead. More complicated shuffling strategies could be
1658 : * possible as well; basically, unless every single leaf page is 100%
1659 : * full, we can jam this key in there if we try hard enough. It's
1660 : * unlikely that trying that hard is worthwhile, but it's possible we
1661 : * might need to make more than no effort. For now, we just do the
1662 : * easy thing, which is nothing.
1663 : */
1664 :
1665 : /* If this is a soft insert, it's time to give up. */
1666 0 : if (soft)
1667 0 : return 0;
1668 :
1669 : /* Check whether we need to allocate more btree pages to split. */
1670 0 : if (result.split_pages > fpm->btree_recycle_count)
1671 : {
1672 : Size pages_needed;
1673 : Size recycle_page;
1674 : Size i;
1675 :
1676 : /*
1677 : * Allocate the required number of pages and split each one in
1678 : * turn. This should never fail, because if we've got enough
1679 : * spans of free pages kicking around that we need additional
1680 : * storage space just to remember them all, then we should
1681 : * certainly have enough to expand the btree, which should only
1682 : * ever use a tiny number of pages compared to the number under
1683 : * management. If it does, something's badly screwed up.
1684 : */
1685 0 : pages_needed = result.split_pages - fpm->btree_recycle_count;
1686 0 : for (i = 0; i < pages_needed; ++i)
1687 : {
1688 0 : if (!FreePageManagerGetInternal(fpm, 1, &recycle_page))
1689 0 : elog(FATAL, "free page manager btree is corrupt");
1690 0 : FreePageBtreeRecycle(fpm, recycle_page);
1691 : }
1692 :
1693 : /*
1694 : * The act of allocating pages to recycle may have invalidated the
1695 : * results of our previous btree reserch, so repeat it. (We could
1696 : * recheck whether any of our split-avoidance strategies that were
1697 : * not viable before now are, but it hardly seems worthwhile, so
1698 : * we don't bother. Consolidation can't be possible now if it
1699 : * wasn't previously.)
1700 : */
1701 0 : FreePageBtreeSearch(fpm, first_page, &result);
1702 :
1703 : /*
1704 : * The act of allocating pages for use in constructing our btree
1705 : * should never cause any page to become more full, so the new
1706 : * split depth should be no greater than the old one, and perhaps
1707 : * less if we fortuitously allocated a chunk that freed up a slot
1708 : * on the page we need to update.
1709 : */
1710 0 : Assert(result.split_pages <= fpm->btree_recycle_count);
1711 : }
1712 :
1713 : /* If we still need to perform a split, do it. */
1714 0 : if (result.split_pages > 0)
1715 : {
1716 0 : FreePageBtree *split_target = result.page;
1717 0 : FreePageBtree *child = NULL;
1718 0 : Size key = first_page;
1719 :
1720 : for (;;)
1721 : {
1722 : FreePageBtree *newsibling;
1723 : FreePageBtree *parent;
1724 :
1725 : /* Identify parent page, which must receive downlink. */
1726 0 : parent = relptr_access(base, split_target->hdr.parent);
1727 :
1728 : /* Split the page - downlink not added yet. */
1729 0 : newsibling = FreePageBtreeSplitPage(fpm, split_target);
1730 :
1731 : /*
1732 : * At this point in the loop, we're always carrying a pending
1733 : * insertion. On the first pass, it's the actual key we're
1734 : * trying to insert; on subsequent passes, it's the downlink
1735 : * that needs to be added as a result of the split performed
1736 : * during the previous loop iteration. Since we've just split
1737 : * the page, there's definitely room on one of the two
1738 : * resulting pages.
1739 : */
1740 0 : if (child == NULL)
1741 : {
1742 : Size index;
1743 : FreePageBtree *insert_into;
1744 :
1745 0 : insert_into = key < newsibling->u.leaf_key[0].first_page ?
1746 0 : split_target : newsibling;
1747 0 : index = FreePageBtreeSearchLeaf(insert_into, key);
1748 0 : FreePageBtreeInsertLeaf(insert_into, index, key, npages);
1749 0 : if (index == 0 && insert_into == split_target)
1750 0 : FreePageBtreeAdjustAncestorKeys(fpm, split_target);
1751 : }
1752 : else
1753 : {
1754 : Size index;
1755 : FreePageBtree *insert_into;
1756 :
1757 0 : insert_into =
1758 0 : key < newsibling->u.internal_key[0].first_page ?
1759 0 : split_target : newsibling;
1760 0 : index = FreePageBtreeSearchInternal(insert_into, key);
1761 0 : FreePageBtreeInsertInternal(base, insert_into, index,
1762 : key, child);
1763 0 : relptr_store(base, child->hdr.parent, insert_into);
1764 0 : if (index == 0 && insert_into == split_target)
1765 0 : FreePageBtreeAdjustAncestorKeys(fpm, split_target);
1766 : }
1767 :
1768 : /* If the page we just split has no parent, split the root. */
1769 0 : if (parent == NULL)
1770 : {
1771 : FreePageBtree *newroot;
1772 :
1773 0 : newroot = FreePageBtreeGetRecycled(fpm);
1774 0 : newroot->hdr.magic = FREE_PAGE_INTERNAL_MAGIC;
1775 0 : newroot->hdr.nused = 2;
1776 0 : relptr_store(base, newroot->hdr.parent,
1777 : (FreePageBtree *) NULL);
1778 0 : newroot->u.internal_key[0].first_page =
1779 0 : FreePageBtreeFirstKey(split_target);
1780 0 : relptr_store(base, newroot->u.internal_key[0].child,
1781 : split_target);
1782 0 : relptr_store(base, split_target->hdr.parent, newroot);
1783 0 : newroot->u.internal_key[1].first_page =
1784 0 : FreePageBtreeFirstKey(newsibling);
1785 0 : relptr_store(base, newroot->u.internal_key[1].child,
1786 : newsibling);
1787 0 : relptr_store(base, newsibling->hdr.parent, newroot);
1788 0 : relptr_store(base, fpm->btree_root, newroot);
1789 0 : fpm->btree_depth++;
1790 :
1791 0 : break;
1792 : }
1793 :
1794 : /* If the parent page isn't full, insert the downlink. */
1795 0 : key = newsibling->u.internal_key[0].first_page;
1796 0 : if (parent->hdr.nused < FPM_ITEMS_PER_INTERNAL_PAGE)
1797 : {
1798 : Size index;
1799 :
1800 0 : index = FreePageBtreeSearchInternal(parent, key);
1801 0 : FreePageBtreeInsertInternal(base, parent, index,
1802 : key, newsibling);
1803 0 : relptr_store(base, newsibling->hdr.parent, parent);
1804 0 : if (index == 0)
1805 0 : FreePageBtreeAdjustAncestorKeys(fpm, parent);
1806 0 : break;
1807 : }
1808 :
1809 : /* The parent also needs to be split, so loop around. */
1810 0 : child = newsibling;
1811 0 : split_target = parent;
1812 0 : }
1813 :
1814 : /*
1815 : * The loop above did the insert, so just need to update the free
1816 : * list, and we're done.
1817 : */
1818 0 : FreePagePushSpanLeader(fpm, first_page, npages);
1819 :
1820 0 : return npages;
1821 : }
1822 : }
1823 :
1824 : /* Physically add the key to the page. */
1825 13 : Assert(result.page->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE);
1826 13 : FreePageBtreeInsertLeaf(result.page, result.index, first_page, npages);
1827 :
1828 : /* If new first key on page, ancestors might need adjustment. */
1829 13 : if (result.index == 0)
1830 12 : FreePageBtreeAdjustAncestorKeys(fpm, result.page);
1831 :
1832 : /* Put it on the free list. */
1833 13 : FreePagePushSpanLeader(fpm, first_page, npages);
1834 :
1835 13 : return npages;
1836 : }
1837 :
1838 : /*
1839 : * Remove a FreePageSpanLeader from the linked-list that contains it, either
1840 : * because we're changing the size of the span, or because we're allocating it.
1841 : */
1842 : static void
1843 18 : FreePagePopSpanLeader(FreePageManager *fpm, Size pageno)
1844 : {
1845 18 : char *base = fpm_segment_base(fpm);
1846 : FreePageSpanLeader *span;
1847 : FreePageSpanLeader *next;
1848 : FreePageSpanLeader *prev;
1849 :
1850 18 : span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
1851 :
1852 18 : next = relptr_access(base, span->next);
1853 18 : prev = relptr_access(base, span->prev);
1854 18 : if (next != NULL)
1855 0 : relptr_copy(next->prev, span->prev);
1856 18 : if (prev != NULL)
1857 0 : relptr_copy(prev->next, span->next);
1858 : else
1859 : {
1860 18 : Size f = Min(span->npages, FPM_NUM_FREELISTS) - 1;
1861 :
1862 18 : Assert(fpm->freelist[f].relptr_off == pageno * FPM_PAGE_SIZE);
1863 18 : relptr_copy(fpm->freelist[f], span->next);
1864 : }
1865 18 : }
1866 :
1867 : /*
1868 : * Initialize a new FreePageSpanLeader and put it on the appropriate free list.
1869 : */
1870 : static void
1871 58 : FreePagePushSpanLeader(FreePageManager *fpm, Size first_page, Size npages)
1872 : {
1873 58 : char *base = fpm_segment_base(fpm);
1874 58 : Size f = Min(npages, FPM_NUM_FREELISTS) - 1;
1875 58 : FreePageSpanLeader *head = relptr_access(base, fpm->freelist[f]);
1876 : FreePageSpanLeader *span;
1877 :
1878 58 : span = (FreePageSpanLeader *) fpm_page_to_pointer(base, first_page);
1879 58 : span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
1880 58 : span->npages = npages;
1881 58 : relptr_store(base, span->next, head);
1882 58 : relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
1883 58 : if (head != NULL)
1884 0 : relptr_store(base, head->prev, span);
1885 58 : relptr_store(base, fpm->freelist[f], span);
1886 58 : }
|