Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * freespace.c
4 : * POSTGRES free space map for quickly finding free space in relations
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/freespace/freespace.c
12 : *
13 : *
14 : * NOTES:
15 : *
16 : * Free Space Map keeps track of the amount of free space on pages, and
17 : * allows quickly searching for a page with enough free space. The FSM is
18 : * stored in a dedicated relation fork of all heap relations, and those
19 : * index access methods that need it (see also indexfsm.c). See README for
20 : * more information.
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 : #include "postgres.h"
25 :
26 : #include "access/htup_details.h"
27 : #include "access/xlogutils.h"
28 : #include "miscadmin.h"
29 : #include "storage/freespace.h"
30 : #include "storage/fsm_internals.h"
31 : #include "storage/lmgr.h"
32 : #include "storage/smgr.h"
33 :
34 :
35 : /*
36 : * We use just one byte to store the amount of free space on a page, so we
37 : * divide the amount of free space a page can have into 256 different
38 : * categories. The highest category, 255, represents a page with at least
39 : * MaxFSMRequestSize bytes of free space, and the second highest category
40 : * represents the range from 254 * FSM_CAT_STEP, inclusive, to
41 : * MaxFSMRequestSize, exclusive.
42 : *
43 : * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
44 : * default 8k BLCKSZ, and that MaxFSMRequestSize is 8164 bytes, the
45 : * categories look like this:
46 : *
47 : *
48 : * Range Category
49 : * 0 - 31 0
50 : * 32 - 63 1
51 : * ... ... ...
52 : * 8096 - 8127 253
53 : * 8128 - 8163 254
54 : * 8164 - 8192 255
55 : *
56 : * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
57 : * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
58 : * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
59 : * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
60 : * completely empty page, that would mean that we could never satisfy a
61 : * request of exactly MaxFSMRequestSize bytes.
62 : */
63 : #define FSM_CATEGORIES 256
64 : #define FSM_CAT_STEP (BLCKSZ / FSM_CATEGORIES)
65 : #define MaxFSMRequestSize MaxHeapTupleSize
66 :
67 : /*
68 : * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
69 : * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
70 : * 216 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
71 : * this means that 4096 bytes is the smallest BLCKSZ that we can get away
72 : * with a 3-level tree, and 512 is the smallest we support.
73 : */
74 : #define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4)
75 :
76 : #define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
77 : #define FSM_BOTTOM_LEVEL 0
78 :
79 : /*
80 : * The internal FSM routines work on a logical addressing scheme. Each
81 : * level of the tree can be thought of as a separately addressable file.
82 : */
83 : typedef struct
84 : {
85 : int level; /* level */
86 : int logpageno; /* page number within the level */
87 : } FSMAddress;
88 :
89 : /* Address of the root page. */
90 : static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};
91 :
92 : /* functions to navigate the tree */
93 : static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
94 : static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
95 : static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot);
96 : static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot);
97 : static BlockNumber fsm_logical_to_physical(FSMAddress addr);
98 :
99 : static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
100 : static void fsm_extend(Relation rel, BlockNumber fsm_nblocks);
101 :
102 : /* functions to convert amount of free space to a FSM category */
103 : static uint8 fsm_space_avail_to_cat(Size avail);
104 : static uint8 fsm_space_needed_to_cat(Size needed);
105 : static Size fsm_space_cat_to_avail(uint8 cat);
106 :
107 : /* workhorse functions for various operations */
108 : static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
109 : uint8 newValue, uint8 minValue);
110 : static BlockNumber fsm_search(Relation rel, uint8 min_cat);
111 : static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof);
112 : static BlockNumber fsm_get_lastblckno(Relation rel, FSMAddress addr);
113 : static void fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat);
114 :
115 :
116 : /******** Public API ********/
117 :
118 : /*
119 : * GetPageWithFreeSpace - try to find a page in the given relation with
120 : * at least the specified amount of free space.
121 : *
122 : * If successful, return the block number; if not, return InvalidBlockNumber.
123 : *
124 : * The caller must be prepared for the possibility that the returned page
125 : * will turn out to have too little space available by the time the caller
126 : * gets a lock on it. In that case, the caller should report the actual
127 : * amount of free space available on that page and then try again (see
128 : * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
129 : * extend the relation.
130 : */
131 : BlockNumber
132 5623 : GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
133 : {
134 5623 : uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
135 :
136 5623 : return fsm_search(rel, min_cat);
137 : }
138 :
139 : /*
140 : * RecordAndGetPageWithFreeSpace - update info about a page and try again.
141 : *
142 : * We provide this combo form to save some locking overhead, compared to
143 : * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
144 : * also some effort to return a page close to the old page; if there's a
145 : * page with enough free space on the same FSM page where the old one page
146 : * is located, it is preferred.
147 : */
148 : BlockNumber
149 8386 : RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
150 : Size oldSpaceAvail, Size spaceNeeded)
151 : {
152 8386 : int old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
153 8386 : int search_cat = fsm_space_needed_to_cat(spaceNeeded);
154 : FSMAddress addr;
155 : uint16 slot;
156 : int search_slot;
157 :
158 : /* Get the location of the FSM byte representing the heap block */
159 8386 : addr = fsm_get_location(oldPage, &slot);
160 :
161 8386 : search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
162 :
163 : /*
164 : * If fsm_set_and_search found a suitable new block, return that.
165 : * Otherwise, search as usual.
166 : */
167 8386 : if (search_slot != -1)
168 560 : return fsm_get_heap_blk(addr, search_slot);
169 : else
170 7826 : return fsm_search(rel, search_cat);
171 : }
172 :
173 : /*
174 : * RecordPageWithFreeSpace - update info about a page.
175 : *
176 : * Note that if the new spaceAvail value is higher than the old value stored
177 : * in the FSM, the space might not become visible to searchers until the next
178 : * FreeSpaceMapVacuum call, which updates the upper level pages.
179 : */
180 : void
181 5264 : RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
182 : {
183 5264 : int new_cat = fsm_space_avail_to_cat(spaceAvail);
184 : FSMAddress addr;
185 : uint16 slot;
186 :
187 : /* Get the location of the FSM byte representing the heap block */
188 5264 : addr = fsm_get_location(heapBlk, &slot);
189 :
190 5264 : fsm_set_and_search(rel, addr, slot, new_cat, 0);
191 5264 : }
192 :
193 : /*
194 : * Update the upper levels of the free space map all the way up to the root
195 : * to make sure we don't lose track of new blocks we just inserted. This is
196 : * intended to be used after adding many new blocks to the relation; we judge
197 : * it not worth updating the upper levels of the tree every time data for
198 : * a single page changes, but for a bulk-extend it's worth it.
199 : */
200 : void
201 6 : UpdateFreeSpaceMap(Relation rel, BlockNumber startBlkNum,
202 : BlockNumber endBlkNum, Size freespace)
203 : {
204 6 : int new_cat = fsm_space_avail_to_cat(freespace);
205 : FSMAddress addr;
206 : uint16 slot;
207 : BlockNumber blockNum;
208 : BlockNumber lastBlkOnPage;
209 :
210 6 : blockNum = startBlkNum;
211 :
212 12 : while (blockNum <= endBlkNum)
213 : {
214 : /*
215 : * Find FSM address for this block; update tree all the way to the
216 : * root.
217 : */
218 6 : addr = fsm_get_location(blockNum, &slot);
219 6 : fsm_update_recursive(rel, addr, new_cat);
220 :
221 : /*
222 : * Get the last block number on this FSM page. If that's greater than
223 : * or equal to our endBlkNum, we're done. Otherwise, advance to the
224 : * first block on the next page.
225 : */
226 6 : lastBlkOnPage = fsm_get_lastblckno(rel, addr);
227 6 : if (lastBlkOnPage >= endBlkNum)
228 6 : break;
229 0 : blockNum = lastBlkOnPage + 1;
230 : }
231 6 : }
232 :
233 : /*
234 : * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
235 : * WAL replay
236 : */
237 : void
238 0 : XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
239 : Size spaceAvail)
240 : {
241 0 : int new_cat = fsm_space_avail_to_cat(spaceAvail);
242 : FSMAddress addr;
243 : uint16 slot;
244 : BlockNumber blkno;
245 : Buffer buf;
246 : Page page;
247 :
248 : /* Get the location of the FSM byte representing the heap block */
249 0 : addr = fsm_get_location(heapBlk, &slot);
250 0 : blkno = fsm_logical_to_physical(addr);
251 :
252 : /* If the page doesn't exist already, extend */
253 0 : buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR);
254 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
255 :
256 0 : page = BufferGetPage(buf);
257 0 : if (PageIsNew(page))
258 0 : PageInit(page, BLCKSZ, 0);
259 :
260 0 : if (fsm_set_avail(page, slot, new_cat))
261 0 : MarkBufferDirtyHint(buf, false);
262 0 : UnlockReleaseBuffer(buf);
263 0 : }
264 :
265 : /*
266 : * GetRecordedFreePage - return the amount of free space on a particular page,
267 : * according to the FSM.
268 : */
269 : Size
270 12 : GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
271 : {
272 : FSMAddress addr;
273 : uint16 slot;
274 : Buffer buf;
275 : uint8 cat;
276 :
277 : /* Get the location of the FSM byte representing the heap block */
278 12 : addr = fsm_get_location(heapBlk, &slot);
279 :
280 12 : buf = fsm_readbuf(rel, addr, false);
281 12 : if (!BufferIsValid(buf))
282 0 : return 0;
283 12 : cat = fsm_get_avail(BufferGetPage(buf), slot);
284 12 : ReleaseBuffer(buf);
285 :
286 12 : return fsm_space_cat_to_avail(cat);
287 : }
288 :
289 : /*
290 : * FreeSpaceMapTruncateRel - adjust for truncation of a relation.
291 : *
292 : * The caller must hold AccessExclusiveLock on the relation, to ensure that
293 : * other backends receive the smgr invalidation event that this function sends
294 : * before they access the FSM again.
295 : *
296 : * nblocks is the new size of the heap.
297 : */
298 : void
299 12 : FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
300 : {
301 : BlockNumber new_nfsmblocks;
302 : FSMAddress first_removed_address;
303 : uint16 first_removed_slot;
304 : Buffer buf;
305 :
306 12 : RelationOpenSmgr(rel);
307 :
308 : /*
309 : * If no FSM has been created yet for this relation, there's nothing to
310 : * truncate.
311 : */
312 12 : if (!smgrexists(rel->rd_smgr, FSM_FORKNUM))
313 0 : return;
314 :
315 : /* Get the location in the FSM of the first removed heap block */
316 12 : first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
317 :
318 : /*
319 : * Zero out the tail of the last remaining FSM page. If the slot
320 : * representing the first removed heap block is at a page boundary, as the
321 : * first slot on the FSM page that first_removed_address points to, we can
322 : * just truncate that page altogether.
323 : */
324 12 : if (first_removed_slot > 0)
325 : {
326 9 : buf = fsm_readbuf(rel, first_removed_address, false);
327 9 : if (!BufferIsValid(buf))
328 0 : return; /* nothing to do; the FSM was already smaller */
329 9 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
330 :
331 : /* NO EREPORT(ERROR) from here till changes are logged */
332 9 : START_CRIT_SECTION();
333 :
334 9 : fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
335 :
336 : /*
337 : * Truncation of a relation is WAL-logged at a higher-level, and we
338 : * will be called at WAL replay. But if checksums are enabled, we need
339 : * to still write a WAL record to protect against a torn page, if the
340 : * page is flushed to disk before the truncation WAL record. We cannot
341 : * use MarkBufferDirtyHint here, because that will not dirty the page
342 : * during recovery.
343 : */
344 9 : MarkBufferDirty(buf);
345 9 : if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())
346 0 : log_newpage_buffer(buf, false);
347 :
348 9 : END_CRIT_SECTION();
349 :
350 9 : UnlockReleaseBuffer(buf);
351 :
352 9 : new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
353 : }
354 : else
355 : {
356 3 : new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
357 3 : if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks)
358 0 : return; /* nothing to do; the FSM was already smaller */
359 : }
360 :
361 : /* Truncate the unused FSM pages, and send smgr inval message */
362 12 : smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks);
363 :
364 : /*
365 : * We might as well update the local smgr_fsm_nblocks setting.
366 : * smgrtruncate sent an smgr cache inval message, which will cause other
367 : * backends to invalidate their copy of smgr_fsm_nblocks, and this one too
368 : * at the next command boundary. But this ensures it isn't outright wrong
369 : * until then.
370 : */
371 12 : if (rel->rd_smgr)
372 12 : rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks;
373 : }
374 :
375 : /*
376 : * FreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM
377 : */
378 : void
379 817 : FreeSpaceMapVacuum(Relation rel)
380 : {
381 : bool dummy;
382 :
383 : /*
384 : * Traverse the tree in depth-first order. The tree is stored physically
385 : * in depth-first order, so this should be pretty I/O efficient.
386 : */
387 817 : fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy);
388 817 : }
389 :
390 : /******** Internal routines ********/
391 :
392 : /*
393 : * Return category corresponding x bytes of free space
394 : */
395 : static uint8
396 13656 : fsm_space_avail_to_cat(Size avail)
397 : {
398 : int cat;
399 :
400 13656 : Assert(avail < BLCKSZ);
401 :
402 13656 : if (avail >= MaxFSMRequestSize)
403 796 : return 255;
404 :
405 12860 : cat = avail / FSM_CAT_STEP;
406 :
407 : /*
408 : * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
409 : * more.
410 : */
411 12860 : if (cat > 254)
412 19 : cat = 254;
413 :
414 12860 : return (uint8) cat;
415 : }
416 :
417 : /*
418 : * Return the lower bound of the range of free space represented by given
419 : * category.
420 : */
421 : static Size
422 12 : fsm_space_cat_to_avail(uint8 cat)
423 : {
424 : /* The highest category represents exactly MaxFSMRequestSize bytes. */
425 12 : if (cat == 255)
426 0 : return MaxFSMRequestSize;
427 : else
428 12 : return cat * FSM_CAT_STEP;
429 : }
430 :
431 : /*
432 : * Which category does a page need to have, to accommodate x bytes of data?
433 : * While fsm_size_to_avail_cat() rounds down, this needs to round up.
434 : */
435 : static uint8
436 14009 : fsm_space_needed_to_cat(Size needed)
437 : {
438 : int cat;
439 :
440 : /* Can't ask for more space than the highest category represents */
441 14009 : if (needed > MaxFSMRequestSize)
442 0 : elog(ERROR, "invalid FSM request size %zu", needed);
443 :
444 14009 : if (needed == 0)
445 0 : return 1;
446 :
447 14009 : cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
448 :
449 14009 : if (cat > 255)
450 0 : cat = 255;
451 :
452 14009 : return (uint8) cat;
453 : }
454 :
455 : /*
456 : * Returns the physical block number of a FSM page
457 : */
458 : static BlockNumber
459 31254 : fsm_logical_to_physical(FSMAddress addr)
460 : {
461 : BlockNumber pages;
462 : int leafno;
463 : int l;
464 :
465 : /*
466 : * Calculate the logical page number of the first leaf page below the
467 : * given page.
468 : */
469 31254 : leafno = addr.logpageno;
470 61738 : for (l = 0; l < addr.level; l++)
471 30484 : leafno *= SlotsPerFSMPage;
472 :
473 : /* Count upper level nodes required to address the leaf page */
474 31254 : pages = 0;
475 125016 : for (l = 0; l < FSM_TREE_DEPTH; l++)
476 : {
477 93762 : pages += leafno + 1;
478 93762 : leafno /= SlotsPerFSMPage;
479 : }
480 :
481 : /*
482 : * If the page we were asked for wasn't at the bottom level, subtract the
483 : * additional lower level pages we counted above.
484 : */
485 31254 : pages -= addr.level;
486 :
487 : /* Turn the page count into 0-based block number */
488 31254 : return pages - 1;
489 : }
490 :
491 : /*
492 : * Return the FSM location corresponding to given heap block.
493 : */
494 : static FSMAddress
495 13680 : fsm_get_location(BlockNumber heapblk, uint16 *slot)
496 : {
497 : FSMAddress addr;
498 :
499 13680 : addr.level = FSM_BOTTOM_LEVEL;
500 13680 : addr.logpageno = heapblk / SlotsPerFSMPage;
501 13680 : *slot = heapblk % SlotsPerFSMPage;
502 :
503 13680 : return addr;
504 : }
505 :
506 : /*
507 : * Return the heap block number corresponding to given location in the FSM.
508 : */
509 : static BlockNumber
510 1506 : fsm_get_heap_blk(FSMAddress addr, uint16 slot)
511 : {
512 1506 : Assert(addr.level == FSM_BOTTOM_LEVEL);
513 1506 : return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
514 : }
515 :
516 : /*
517 : * Given a logical address of a child page, get the logical page number of
518 : * the parent, and the slot within the parent corresponding to the child.
519 : */
520 : static FSMAddress
521 116 : fsm_get_parent(FSMAddress child, uint16 *slot)
522 : {
523 : FSMAddress parent;
524 :
525 116 : Assert(child.level < FSM_ROOT_LEVEL);
526 :
527 116 : parent.level = child.level + 1;
528 116 : parent.logpageno = child.logpageno / SlotsPerFSMPage;
529 116 : *slot = child.logpageno % SlotsPerFSMPage;
530 :
531 116 : return parent;
532 : }
533 :
534 : /*
535 : * Given a logical address of a parent page and a slot number, get the
536 : * logical address of the corresponding child page.
537 : */
538 : static FSMAddress
539 3085 : fsm_get_child(FSMAddress parent, uint16 slot)
540 : {
541 : FSMAddress child;
542 :
543 3085 : Assert(parent.level > FSM_BOTTOM_LEVEL);
544 :
545 3085 : child.level = parent.level - 1;
546 3085 : child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
547 :
548 3085 : return child;
549 : }
550 :
551 : /*
552 : * Read a FSM page.
553 : *
554 : * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
555 : * true, the FSM file is extended.
556 : */
557 : static Buffer
558 31242 : fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
559 : {
560 31242 : BlockNumber blkno = fsm_logical_to_physical(addr);
561 : Buffer buf;
562 :
563 31242 : RelationOpenSmgr(rel);
564 :
565 : /*
566 : * If we haven't cached the size of the FSM yet, check it first. Also
567 : * recheck if the requested block seems to be past end, since our cached
568 : * value might be stale. (We send smgr inval messages on truncation, but
569 : * not on extension.)
570 : */
571 58435 : if (rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber ||
572 27193 : blkno >= rel->rd_smgr->smgr_fsm_nblocks)
573 : {
574 7126 : if (smgrexists(rel->rd_smgr, FSM_FORKNUM))
575 2239 : rel->rd_smgr->smgr_fsm_nblocks = smgrnblocks(rel->rd_smgr,
576 : FSM_FORKNUM);
577 : else
578 4887 : rel->rd_smgr->smgr_fsm_nblocks = 0;
579 : }
580 :
581 : /* Handle requests beyond EOF */
582 31242 : if (blkno >= rel->rd_smgr->smgr_fsm_nblocks)
583 : {
584 5473 : if (extend)
585 223 : fsm_extend(rel, blkno + 1);
586 : else
587 5250 : return InvalidBuffer;
588 : }
589 :
590 : /*
591 : * Use ZERO_ON_ERROR mode, and initialize the page if necessary. The FSM
592 : * information is not accurate anyway, so it's better to clear corrupt
593 : * pages than error out. Since the FSM changes are not WAL-logged, the
594 : * so-called torn page problem on crash can lead to pages with corrupt
595 : * headers, for example.
596 : */
597 25992 : buf = ReadBufferExtended(rel, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, NULL);
598 25992 : if (PageIsNew(BufferGetPage(buf)))
599 0 : PageInit(BufferGetPage(buf), BLCKSZ, 0);
600 25992 : return buf;
601 : }
602 :
603 : /*
604 : * Ensure that the FSM fork is at least fsm_nblocks long, extending
605 : * it if necessary with empty pages. And by empty, I mean pages filled
606 : * with zeros, meaning there's no free space.
607 : */
608 : static void
609 223 : fsm_extend(Relation rel, BlockNumber fsm_nblocks)
610 : {
611 : BlockNumber fsm_nblocks_now;
612 : Page pg;
613 :
614 223 : pg = (Page) palloc(BLCKSZ);
615 223 : PageInit(pg, BLCKSZ, 0);
616 :
617 : /*
618 : * We use the relation extension lock to lock out other backends trying to
619 : * extend the FSM at the same time. It also locks out extension of the
620 : * main fork, unnecessarily, but extending the FSM happens seldom enough
621 : * that it doesn't seem worthwhile to have a separate lock tag type for
622 : * it.
623 : *
624 : * Note that another backend might have extended or created the relation
625 : * by the time we get the lock.
626 : */
627 223 : LockRelationForExtension(rel, ExclusiveLock);
628 :
629 : /* Might have to re-open if a cache flush happened */
630 223 : RelationOpenSmgr(rel);
631 :
632 : /*
633 : * Create the FSM file first if it doesn't exist. If smgr_fsm_nblocks is
634 : * positive then it must exist, no need for an smgrexists call.
635 : */
636 223 : if ((rel->rd_smgr->smgr_fsm_nblocks == 0 ||
637 223 : rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber) &&
638 223 : !smgrexists(rel->rd_smgr, FSM_FORKNUM))
639 197 : smgrcreate(rel->rd_smgr, FSM_FORKNUM, false);
640 :
641 223 : fsm_nblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM);
642 :
643 1115 : while (fsm_nblocks_now < fsm_nblocks)
644 : {
645 669 : PageSetChecksumInplace(pg, fsm_nblocks_now);
646 :
647 669 : smgrextend(rel->rd_smgr, FSM_FORKNUM, fsm_nblocks_now,
648 : (char *) pg, false);
649 669 : fsm_nblocks_now++;
650 : }
651 :
652 : /* Update local cache with the up-to-date size */
653 223 : rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now;
654 :
655 223 : UnlockRelationForExtension(rel, ExclusiveLock);
656 :
657 223 : pfree(pg);
658 223 : }
659 :
660 : /*
661 : * Set value in given FSM page and slot.
662 : *
663 : * If minValue > 0, the updated page is also searched for a page with at
664 : * least minValue of free space. If one is found, its slot number is
665 : * returned, -1 otherwise.
666 : */
667 : static int
668 13766 : fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
669 : uint8 newValue, uint8 minValue)
670 : {
671 : Buffer buf;
672 : Page page;
673 13766 : int newslot = -1;
674 :
675 13766 : buf = fsm_readbuf(rel, addr, true);
676 13766 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
677 :
678 13766 : page = BufferGetPage(buf);
679 :
680 13766 : if (fsm_set_avail(page, slot, newValue))
681 7667 : MarkBufferDirtyHint(buf, false);
682 :
683 13766 : if (minValue != 0)
684 : {
685 : /* Search while we still hold the lock */
686 8386 : newslot = fsm_search_avail(buf, minValue,
687 8386 : addr.level == FSM_BOTTOM_LEVEL,
688 : true);
689 : }
690 :
691 13766 : UnlockReleaseBuffer(buf);
692 :
693 13766 : return newslot;
694 : }
695 :
696 : /*
697 : * Search the tree for a heap page with at least min_cat of free space
698 : */
699 : static BlockNumber
700 13449 : fsm_search(Relation rel, uint8 min_cat)
701 : {
702 13449 : int restarts = 0;
703 13449 : FSMAddress addr = FSM_ROOT_ADDRESS;
704 :
705 : for (;;)
706 : {
707 : int slot;
708 : Buffer buf;
709 15589 : uint8 max_avail = 0;
710 :
711 : /* Read the FSM page. */
712 15589 : buf = fsm_readbuf(rel, addr, false);
713 :
714 : /* Search within the page */
715 15589 : if (BufferIsValid(buf))
716 : {
717 11419 : LockBuffer(buf, BUFFER_LOCK_SHARE);
718 11419 : slot = fsm_search_avail(buf, min_cat,
719 11419 : (addr.level == FSM_BOTTOM_LEVEL),
720 : false);
721 11419 : if (slot == -1)
722 8443 : max_avail = fsm_get_max_avail(BufferGetPage(buf));
723 11419 : UnlockReleaseBuffer(buf);
724 : }
725 : else
726 4170 : slot = -1;
727 :
728 15589 : if (slot != -1)
729 : {
730 : /*
731 : * Descend the tree, or return the found block if we're at the
732 : * bottom.
733 : */
734 2976 : if (addr.level == FSM_BOTTOM_LEVEL)
735 940 : return fsm_get_heap_blk(addr, slot);
736 :
737 2036 : addr = fsm_get_child(addr, slot);
738 : }
739 12613 : else if (addr.level == FSM_ROOT_LEVEL)
740 : {
741 : /*
742 : * At the root, failure means there's no page with enough free
743 : * space in the FSM. Give up.
744 : */
745 12509 : return InvalidBlockNumber;
746 : }
747 : else
748 : {
749 : uint16 parentslot;
750 : FSMAddress parent;
751 :
752 : /*
753 : * At lower level, failure can happen if the value in the upper-
754 : * level node didn't reflect the value on the lower page. Update
755 : * the upper node, to avoid falling into the same trap again, and
756 : * start over.
757 : *
758 : * There's a race condition here, if another backend updates this
759 : * page right after we release it, and gets the lock on the parent
760 : * page before us. We'll then update the parent page with the now
761 : * stale information we had. It's OK, because it should happen
762 : * rarely, and will be fixed by the next vacuum.
763 : */
764 104 : parent = fsm_get_parent(addr, &parentslot);
765 104 : fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
766 :
767 : /*
768 : * If the upper pages are badly out of date, we might need to loop
769 : * quite a few times, updating them as we go. Any inconsistencies
770 : * should eventually be corrected and the loop should end. Looping
771 : * indefinitely is nevertheless scary, so provide an emergency
772 : * valve.
773 : */
774 104 : if (restarts++ > 10000)
775 0 : return InvalidBlockNumber;
776 :
777 : /* Start search all over from the root */
778 104 : addr = FSM_ROOT_ADDRESS;
779 : }
780 2140 : }
781 : }
782 :
783 :
784 : /*
785 : * Recursive guts of FreeSpaceMapVacuum
786 : */
787 : static uint8
788 1866 : fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
789 : {
790 : Buffer buf;
791 : Page page;
792 : uint8 max_avail;
793 :
794 : /* Read the page if it exists, or return EOF */
795 1866 : buf = fsm_readbuf(rel, addr, false);
796 1866 : if (!BufferIsValid(buf))
797 : {
798 1080 : *eof_p = true;
799 1080 : return 0;
800 : }
801 : else
802 786 : *eof_p = false;
803 :
804 786 : page = BufferGetPage(buf);
805 :
806 : /*
807 : * Recurse into children, and fix the information stored about them at
808 : * this level.
809 : */
810 786 : if (addr.level > FSM_BOTTOM_LEVEL)
811 : {
812 : int slot;
813 526 : bool eof = false;
814 :
815 2140820 : for (slot = 0; slot < SlotsPerFSMPage; slot++)
816 : {
817 : int child_avail;
818 :
819 2140294 : CHECK_FOR_INTERRUPTS();
820 :
821 : /* After we hit end-of-file, just clear the rest of the slots */
822 2140294 : if (!eof)
823 1049 : child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof);
824 : else
825 2139245 : child_avail = 0;
826 :
827 : /* Update information about the child */
828 2140294 : if (fsm_get_avail(page, slot) != child_avail)
829 : {
830 454 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
831 454 : fsm_set_avail(BufferGetPage(buf), slot, child_avail);
832 454 : MarkBufferDirtyHint(buf, false);
833 454 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
834 : }
835 : }
836 : }
837 :
838 786 : max_avail = fsm_get_max_avail(BufferGetPage(buf));
839 :
840 : /*
841 : * Reset the next slot pointer. This encourages the use of low-numbered
842 : * pages, increasing the chances that a later vacuum can truncate the
843 : * relation.
844 : */
845 786 : ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
846 :
847 786 : ReleaseBuffer(buf);
848 :
849 786 : return max_avail;
850 : }
851 :
852 : /*
853 : * This function will return the last block number stored on given
854 : * FSM page address.
855 : */
856 : static BlockNumber
857 6 : fsm_get_lastblckno(Relation rel, FSMAddress addr)
858 : {
859 : int slot;
860 :
861 : /*
862 : * Get the last slot number on the given address and convert that to block
863 : * number
864 : */
865 6 : slot = SlotsPerFSMPage - 1;
866 6 : return fsm_get_heap_blk(addr, slot);
867 : }
868 :
869 : /*
870 : * Recursively update the FSM tree from given address to
871 : * all the way up to root.
872 : */
873 : static void
874 18 : fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat)
875 : {
876 : uint16 parentslot;
877 : FSMAddress parent;
878 :
879 18 : if (addr.level == FSM_ROOT_LEVEL)
880 24 : return;
881 :
882 : /*
883 : * Get the parent page and our slot in the parent page, and update the
884 : * information in that.
885 : */
886 12 : parent = fsm_get_parent(addr, &parentslot);
887 12 : fsm_set_and_search(rel, parent, parentslot, new_cat, 0);
888 12 : fsm_update_recursive(rel, parent, new_cat);
889 : }
|