LCOV - code coverage report
Current view: top level - src/backend/storage/freespace - freespace.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 203 228 89.0 %
Date: 2017-09-29 13:40:31 Functions: 22 23 95.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * freespace.c
       4             :  *    POSTGRES free space map for quickly finding free space in relations
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/storage/freespace/freespace.c
      12             :  *
      13             :  *
      14             :  * NOTES:
      15             :  *
      16             :  *  Free Space Map keeps track of the amount of free space on pages, and
      17             :  *  allows quickly searching for a page with enough free space. The FSM is
      18             :  *  stored in a dedicated relation fork of all heap relations, and those
      19             :  *  index access methods that need it (see also indexfsm.c). See README for
      20             :  *  more information.
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : #include "postgres.h"
      25             : 
      26             : #include "access/htup_details.h"
      27             : #include "access/xlogutils.h"
      28             : #include "miscadmin.h"
      29             : #include "storage/freespace.h"
      30             : #include "storage/fsm_internals.h"
      31             : #include "storage/lmgr.h"
      32             : #include "storage/smgr.h"
      33             : 
      34             : 
      35             : /*
      36             :  * We use just one byte to store the amount of free space on a page, so we
      37             :  * divide the amount of free space a page can have into 256 different
      38             :  * categories. The highest category, 255, represents a page with at least
      39             :  * MaxFSMRequestSize bytes of free space, and the second highest category
      40             :  * represents the range from 254 * FSM_CAT_STEP, inclusive, to
      41             :  * MaxFSMRequestSize, exclusive.
      42             :  *
      43             :  * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
      44             :  * default 8k BLCKSZ, and that MaxFSMRequestSize is 8164 bytes, the
      45             :  * categories look like this:
      46             :  *
      47             :  *
      48             :  * Range     Category
      49             :  * 0    - 31   0
      50             :  * 32   - 63   1
      51             :  * ...    ...  ...
      52             :  * 8096 - 8127 253
      53             :  * 8128 - 8163 254
      54             :  * 8164 - 8192 255
      55             :  *
      56             :  * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
      57             :  * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
      58             :  * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
      59             :  * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
      60             :  * completely empty page, that would mean that we could never satisfy a
      61             :  * request of exactly MaxFSMRequestSize bytes.
      62             :  */
      63             : #define FSM_CATEGORIES  256
      64             : #define FSM_CAT_STEP    (BLCKSZ / FSM_CATEGORIES)
      65             : #define MaxFSMRequestSize   MaxHeapTupleSize
      66             : 
      67             : /*
      68             :  * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
      69             :  * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
      70             :  * 216 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
      71             :  * this means that 4096 bytes is the smallest BLCKSZ that we can get away
      72             :  * with a 3-level tree, and 512 is the smallest we support.
      73             :  */
      74             : #define FSM_TREE_DEPTH  ((SlotsPerFSMPage >= 1626) ? 3 : 4)
      75             : 
      76             : #define FSM_ROOT_LEVEL  (FSM_TREE_DEPTH - 1)
      77             : #define FSM_BOTTOM_LEVEL 0
      78             : 
      79             : /*
      80             :  * The internal FSM routines work on a logical addressing scheme. Each
      81             :  * level of the tree can be thought of as a separately addressable file.
      82             :  */
      83             : typedef struct
      84             : {
      85             :     int         level;          /* level */
      86             :     int         logpageno;      /* page number within the level */
      87             : } FSMAddress;
      88             : 
      89             : /* Address of the root page. */
      90             : static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};
      91             : 
      92             : /* functions to navigate the tree */
      93             : static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
      94             : static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
      95             : static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot);
      96             : static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot);
      97             : static BlockNumber fsm_logical_to_physical(FSMAddress addr);
      98             : 
      99             : static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
     100             : static void fsm_extend(Relation rel, BlockNumber fsm_nblocks);
     101             : 
     102             : /* functions to convert amount of free space to a FSM category */
     103             : static uint8 fsm_space_avail_to_cat(Size avail);
     104             : static uint8 fsm_space_needed_to_cat(Size needed);
     105             : static Size fsm_space_cat_to_avail(uint8 cat);
     106             : 
     107             : /* workhorse functions for various operations */
     108             : static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
     109             :                    uint8 newValue, uint8 minValue);
     110             : static BlockNumber fsm_search(Relation rel, uint8 min_cat);
     111             : static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof);
     112             : static BlockNumber fsm_get_lastblckno(Relation rel, FSMAddress addr);
     113             : static void fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat);
     114             : 
     115             : 
     116             : /******** Public API ********/
     117             : 
     118             : /*
     119             :  * GetPageWithFreeSpace - try to find a page in the given relation with
     120             :  *      at least the specified amount of free space.
     121             :  *
     122             :  * If successful, return the block number; if not, return InvalidBlockNumber.
     123             :  *
     124             :  * The caller must be prepared for the possibility that the returned page
     125             :  * will turn out to have too little space available by the time the caller
     126             :  * gets a lock on it.  In that case, the caller should report the actual
     127             :  * amount of free space available on that page and then try again (see
     128             :  * RecordAndGetPageWithFreeSpace).  If InvalidBlockNumber is returned,
     129             :  * extend the relation.
     130             :  */
     131             : BlockNumber
     132        5623 : GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
     133             : {
     134        5623 :     uint8       min_cat = fsm_space_needed_to_cat(spaceNeeded);
     135             : 
     136        5623 :     return fsm_search(rel, min_cat);
     137             : }
     138             : 
     139             : /*
     140             :  * RecordAndGetPageWithFreeSpace - update info about a page and try again.
     141             :  *
     142             :  * We provide this combo form to save some locking overhead, compared to
     143             :  * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
     144             :  * also some effort to return a page close to the old page; if there's a
     145             :  * page with enough free space on the same FSM page where the old one page
     146             :  * is located, it is preferred.
     147             :  */
     148             : BlockNumber
     149        8386 : RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
     150             :                               Size oldSpaceAvail, Size spaceNeeded)
     151             : {
     152        8386 :     int         old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
     153        8386 :     int         search_cat = fsm_space_needed_to_cat(spaceNeeded);
     154             :     FSMAddress  addr;
     155             :     uint16      slot;
     156             :     int         search_slot;
     157             : 
     158             :     /* Get the location of the FSM byte representing the heap block */
     159        8386 :     addr = fsm_get_location(oldPage, &slot);
     160             : 
     161        8386 :     search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
     162             : 
     163             :     /*
     164             :      * If fsm_set_and_search found a suitable new block, return that.
     165             :      * Otherwise, search as usual.
     166             :      */
     167        8386 :     if (search_slot != -1)
     168         560 :         return fsm_get_heap_blk(addr, search_slot);
     169             :     else
     170        7826 :         return fsm_search(rel, search_cat);
     171             : }
     172             : 
     173             : /*
     174             :  * RecordPageWithFreeSpace - update info about a page.
     175             :  *
     176             :  * Note that if the new spaceAvail value is higher than the old value stored
     177             :  * in the FSM, the space might not become visible to searchers until the next
     178             :  * FreeSpaceMapVacuum call, which updates the upper level pages.
     179             :  */
     180             : void
     181        5264 : RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
     182             : {
     183        5264 :     int         new_cat = fsm_space_avail_to_cat(spaceAvail);
     184             :     FSMAddress  addr;
     185             :     uint16      slot;
     186             : 
     187             :     /* Get the location of the FSM byte representing the heap block */
     188        5264 :     addr = fsm_get_location(heapBlk, &slot);
     189             : 
     190        5264 :     fsm_set_and_search(rel, addr, slot, new_cat, 0);
     191        5264 : }
     192             : 
     193             : /*
     194             :  * Update the upper levels of the free space map all the way up to the root
     195             :  * to make sure we don't lose track of new blocks we just inserted.  This is
     196             :  * intended to be used after adding many new blocks to the relation; we judge
     197             :  * it not worth updating the upper levels of the tree every time data for
     198             :  * a single page changes, but for a bulk-extend it's worth it.
     199             :  */
     200             : void
     201           6 : UpdateFreeSpaceMap(Relation rel, BlockNumber startBlkNum,
     202             :                    BlockNumber endBlkNum, Size freespace)
     203             : {
     204           6 :     int         new_cat = fsm_space_avail_to_cat(freespace);
     205             :     FSMAddress  addr;
     206             :     uint16      slot;
     207             :     BlockNumber blockNum;
     208             :     BlockNumber lastBlkOnPage;
     209             : 
     210           6 :     blockNum = startBlkNum;
     211             : 
     212          12 :     while (blockNum <= endBlkNum)
     213             :     {
     214             :         /*
     215             :          * Find FSM address for this block; update tree all the way to the
     216             :          * root.
     217             :          */
     218           6 :         addr = fsm_get_location(blockNum, &slot);
     219           6 :         fsm_update_recursive(rel, addr, new_cat);
     220             : 
     221             :         /*
     222             :          * Get the last block number on this FSM page.  If that's greater than
     223             :          * or equal to our endBlkNum, we're done.  Otherwise, advance to the
     224             :          * first block on the next page.
     225             :          */
     226           6 :         lastBlkOnPage = fsm_get_lastblckno(rel, addr);
     227           6 :         if (lastBlkOnPage >= endBlkNum)
     228           6 :             break;
     229           0 :         blockNum = lastBlkOnPage + 1;
     230             :     }
     231           6 : }
     232             : 
     233             : /*
     234             :  * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
     235             :  *      WAL replay
     236             :  */
     237             : void
     238           0 : XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
     239             :                             Size spaceAvail)
     240             : {
     241           0 :     int         new_cat = fsm_space_avail_to_cat(spaceAvail);
     242             :     FSMAddress  addr;
     243             :     uint16      slot;
     244             :     BlockNumber blkno;
     245             :     Buffer      buf;
     246             :     Page        page;
     247             : 
     248             :     /* Get the location of the FSM byte representing the heap block */
     249           0 :     addr = fsm_get_location(heapBlk, &slot);
     250           0 :     blkno = fsm_logical_to_physical(addr);
     251             : 
     252             :     /* If the page doesn't exist already, extend */
     253           0 :     buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR);
     254           0 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     255             : 
     256           0 :     page = BufferGetPage(buf);
     257           0 :     if (PageIsNew(page))
     258           0 :         PageInit(page, BLCKSZ, 0);
     259             : 
     260           0 :     if (fsm_set_avail(page, slot, new_cat))
     261           0 :         MarkBufferDirtyHint(buf, false);
     262           0 :     UnlockReleaseBuffer(buf);
     263           0 : }
     264             : 
     265             : /*
     266             :  * GetRecordedFreePage - return the amount of free space on a particular page,
     267             :  *      according to the FSM.
     268             :  */
     269             : Size
     270          12 : GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
     271             : {
     272             :     FSMAddress  addr;
     273             :     uint16      slot;
     274             :     Buffer      buf;
     275             :     uint8       cat;
     276             : 
     277             :     /* Get the location of the FSM byte representing the heap block */
     278          12 :     addr = fsm_get_location(heapBlk, &slot);
     279             : 
     280          12 :     buf = fsm_readbuf(rel, addr, false);
     281          12 :     if (!BufferIsValid(buf))
     282           0 :         return 0;
     283          12 :     cat = fsm_get_avail(BufferGetPage(buf), slot);
     284          12 :     ReleaseBuffer(buf);
     285             : 
     286          12 :     return fsm_space_cat_to_avail(cat);
     287             : }
     288             : 
     289             : /*
     290             :  * FreeSpaceMapTruncateRel - adjust for truncation of a relation.
     291             :  *
     292             :  * The caller must hold AccessExclusiveLock on the relation, to ensure that
     293             :  * other backends receive the smgr invalidation event that this function sends
     294             :  * before they access the FSM again.
     295             :  *
     296             :  * nblocks is the new size of the heap.
     297             :  */
     298             : void
     299          12 : FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks)
     300             : {
     301             :     BlockNumber new_nfsmblocks;
     302             :     FSMAddress  first_removed_address;
     303             :     uint16      first_removed_slot;
     304             :     Buffer      buf;
     305             : 
     306          12 :     RelationOpenSmgr(rel);
     307             : 
     308             :     /*
     309             :      * If no FSM has been created yet for this relation, there's nothing to
     310             :      * truncate.
     311             :      */
     312          12 :     if (!smgrexists(rel->rd_smgr, FSM_FORKNUM))
     313           0 :         return;
     314             : 
     315             :     /* Get the location in the FSM of the first removed heap block */
     316          12 :     first_removed_address = fsm_get_location(nblocks, &first_removed_slot);
     317             : 
     318             :     /*
     319             :      * Zero out the tail of the last remaining FSM page. If the slot
     320             :      * representing the first removed heap block is at a page boundary, as the
     321             :      * first slot on the FSM page that first_removed_address points to, we can
     322             :      * just truncate that page altogether.
     323             :      */
     324          12 :     if (first_removed_slot > 0)
     325             :     {
     326           9 :         buf = fsm_readbuf(rel, first_removed_address, false);
     327           9 :         if (!BufferIsValid(buf))
     328           0 :             return;             /* nothing to do; the FSM was already smaller */
     329           9 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     330             : 
     331             :         /* NO EREPORT(ERROR) from here till changes are logged */
     332           9 :         START_CRIT_SECTION();
     333             : 
     334           9 :         fsm_truncate_avail(BufferGetPage(buf), first_removed_slot);
     335             : 
     336             :         /*
     337             :          * Truncation of a relation is WAL-logged at a higher-level, and we
     338             :          * will be called at WAL replay. But if checksums are enabled, we need
     339             :          * to still write a WAL record to protect against a torn page, if the
     340             :          * page is flushed to disk before the truncation WAL record. We cannot
     341             :          * use MarkBufferDirtyHint here, because that will not dirty the page
     342             :          * during recovery.
     343             :          */
     344           9 :         MarkBufferDirty(buf);
     345           9 :         if (!InRecovery && RelationNeedsWAL(rel) && XLogHintBitIsNeeded())
     346           0 :             log_newpage_buffer(buf, false);
     347             : 
     348           9 :         END_CRIT_SECTION();
     349             : 
     350           9 :         UnlockReleaseBuffer(buf);
     351             : 
     352           9 :         new_nfsmblocks = fsm_logical_to_physical(first_removed_address) + 1;
     353             :     }
     354             :     else
     355             :     {
     356           3 :         new_nfsmblocks = fsm_logical_to_physical(first_removed_address);
     357           3 :         if (smgrnblocks(rel->rd_smgr, FSM_FORKNUM) <= new_nfsmblocks)
     358           0 :             return;             /* nothing to do; the FSM was already smaller */
     359             :     }
     360             : 
     361             :     /* Truncate the unused FSM pages, and send smgr inval message */
     362          12 :     smgrtruncate(rel->rd_smgr, FSM_FORKNUM, new_nfsmblocks);
     363             : 
     364             :     /*
     365             :      * We might as well update the local smgr_fsm_nblocks setting.
     366             :      * smgrtruncate sent an smgr cache inval message, which will cause other
     367             :      * backends to invalidate their copy of smgr_fsm_nblocks, and this one too
     368             :      * at the next command boundary.  But this ensures it isn't outright wrong
     369             :      * until then.
     370             :      */
     371          12 :     if (rel->rd_smgr)
     372          12 :         rel->rd_smgr->smgr_fsm_nblocks = new_nfsmblocks;
     373             : }
     374             : 
     375             : /*
     376             :  * FreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM
     377             :  */
     378             : void
     379         817 : FreeSpaceMapVacuum(Relation rel)
     380             : {
     381             :     bool        dummy;
     382             : 
     383             :     /*
     384             :      * Traverse the tree in depth-first order. The tree is stored physically
     385             :      * in depth-first order, so this should be pretty I/O efficient.
     386             :      */
     387         817 :     fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy);
     388         817 : }
     389             : 
     390             : /******** Internal routines ********/
     391             : 
     392             : /*
     393             :  * Return category corresponding x bytes of free space
     394             :  */
     395             : static uint8
     396       13656 : fsm_space_avail_to_cat(Size avail)
     397             : {
     398             :     int         cat;
     399             : 
     400       13656 :     Assert(avail < BLCKSZ);
     401             : 
     402       13656 :     if (avail >= MaxFSMRequestSize)
     403         796 :         return 255;
     404             : 
     405       12860 :     cat = avail / FSM_CAT_STEP;
     406             : 
     407             :     /*
     408             :      * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
     409             :      * more.
     410             :      */
     411       12860 :     if (cat > 254)
     412          19 :         cat = 254;
     413             : 
     414       12860 :     return (uint8) cat;
     415             : }
     416             : 
     417             : /*
     418             :  * Return the lower bound of the range of free space represented by given
     419             :  * category.
     420             :  */
     421             : static Size
     422          12 : fsm_space_cat_to_avail(uint8 cat)
     423             : {
     424             :     /* The highest category represents exactly MaxFSMRequestSize bytes. */
     425          12 :     if (cat == 255)
     426           0 :         return MaxFSMRequestSize;
     427             :     else
     428          12 :         return cat * FSM_CAT_STEP;
     429             : }
     430             : 
     431             : /*
     432             :  * Which category does a page need to have, to accommodate x bytes of data?
     433             :  * While fsm_size_to_avail_cat() rounds down, this needs to round up.
     434             :  */
     435             : static uint8
     436       14009 : fsm_space_needed_to_cat(Size needed)
     437             : {
     438             :     int         cat;
     439             : 
     440             :     /* Can't ask for more space than the highest category represents */
     441       14009 :     if (needed > MaxFSMRequestSize)
     442           0 :         elog(ERROR, "invalid FSM request size %zu", needed);
     443             : 
     444       14009 :     if (needed == 0)
     445           0 :         return 1;
     446             : 
     447       14009 :     cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
     448             : 
     449       14009 :     if (cat > 255)
     450           0 :         cat = 255;
     451             : 
     452       14009 :     return (uint8) cat;
     453             : }
     454             : 
     455             : /*
     456             :  * Returns the physical block number of a FSM page
     457             :  */
     458             : static BlockNumber
     459       31254 : fsm_logical_to_physical(FSMAddress addr)
     460             : {
     461             :     BlockNumber pages;
     462             :     int         leafno;
     463             :     int         l;
     464             : 
     465             :     /*
     466             :      * Calculate the logical page number of the first leaf page below the
     467             :      * given page.
     468             :      */
     469       31254 :     leafno = addr.logpageno;
     470       61738 :     for (l = 0; l < addr.level; l++)
     471       30484 :         leafno *= SlotsPerFSMPage;
     472             : 
     473             :     /* Count upper level nodes required to address the leaf page */
     474       31254 :     pages = 0;
     475      125016 :     for (l = 0; l < FSM_TREE_DEPTH; l++)
     476             :     {
     477       93762 :         pages += leafno + 1;
     478       93762 :         leafno /= SlotsPerFSMPage;
     479             :     }
     480             : 
     481             :     /*
     482             :      * If the page we were asked for wasn't at the bottom level, subtract the
     483             :      * additional lower level pages we counted above.
     484             :      */
     485       31254 :     pages -= addr.level;
     486             : 
     487             :     /* Turn the page count into 0-based block number */
     488       31254 :     return pages - 1;
     489             : }
     490             : 
     491             : /*
     492             :  * Return the FSM location corresponding to given heap block.
     493             :  */
     494             : static FSMAddress
     495       13680 : fsm_get_location(BlockNumber heapblk, uint16 *slot)
     496             : {
     497             :     FSMAddress  addr;
     498             : 
     499       13680 :     addr.level = FSM_BOTTOM_LEVEL;
     500       13680 :     addr.logpageno = heapblk / SlotsPerFSMPage;
     501       13680 :     *slot = heapblk % SlotsPerFSMPage;
     502             : 
     503       13680 :     return addr;
     504             : }
     505             : 
     506             : /*
     507             :  * Return the heap block number corresponding to given location in the FSM.
     508             :  */
     509             : static BlockNumber
     510        1506 : fsm_get_heap_blk(FSMAddress addr, uint16 slot)
     511             : {
     512        1506 :     Assert(addr.level == FSM_BOTTOM_LEVEL);
     513        1506 :     return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
     514             : }
     515             : 
     516             : /*
     517             :  * Given a logical address of a child page, get the logical page number of
     518             :  * the parent, and the slot within the parent corresponding to the child.
     519             :  */
     520             : static FSMAddress
     521         116 : fsm_get_parent(FSMAddress child, uint16 *slot)
     522             : {
     523             :     FSMAddress  parent;
     524             : 
     525         116 :     Assert(child.level < FSM_ROOT_LEVEL);
     526             : 
     527         116 :     parent.level = child.level + 1;
     528         116 :     parent.logpageno = child.logpageno / SlotsPerFSMPage;
     529         116 :     *slot = child.logpageno % SlotsPerFSMPage;
     530             : 
     531         116 :     return parent;
     532             : }
     533             : 
     534             : /*
     535             :  * Given a logical address of a parent page and a slot number, get the
     536             :  * logical address of the corresponding child page.
     537             :  */
     538             : static FSMAddress
     539        3085 : fsm_get_child(FSMAddress parent, uint16 slot)
     540             : {
     541             :     FSMAddress  child;
     542             : 
     543        3085 :     Assert(parent.level > FSM_BOTTOM_LEVEL);
     544             : 
     545        3085 :     child.level = parent.level - 1;
     546        3085 :     child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
     547             : 
     548        3085 :     return child;
     549             : }
     550             : 
     551             : /*
     552             :  * Read a FSM page.
     553             :  *
     554             :  * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
     555             :  * true, the FSM file is extended.
     556             :  */
     557             : static Buffer
     558       31242 : fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
     559             : {
     560       31242 :     BlockNumber blkno = fsm_logical_to_physical(addr);
     561             :     Buffer      buf;
     562             : 
     563       31242 :     RelationOpenSmgr(rel);
     564             : 
     565             :     /*
     566             :      * If we haven't cached the size of the FSM yet, check it first.  Also
     567             :      * recheck if the requested block seems to be past end, since our cached
     568             :      * value might be stale.  (We send smgr inval messages on truncation, but
     569             :      * not on extension.)
     570             :      */
     571       58435 :     if (rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber ||
     572       27193 :         blkno >= rel->rd_smgr->smgr_fsm_nblocks)
     573             :     {
     574        7126 :         if (smgrexists(rel->rd_smgr, FSM_FORKNUM))
     575        2239 :             rel->rd_smgr->smgr_fsm_nblocks = smgrnblocks(rel->rd_smgr,
     576             :                                                          FSM_FORKNUM);
     577             :         else
     578        4887 :             rel->rd_smgr->smgr_fsm_nblocks = 0;
     579             :     }
     580             : 
     581             :     /* Handle requests beyond EOF */
     582       31242 :     if (blkno >= rel->rd_smgr->smgr_fsm_nblocks)
     583             :     {
     584        5473 :         if (extend)
     585         223 :             fsm_extend(rel, blkno + 1);
     586             :         else
     587        5250 :             return InvalidBuffer;
     588             :     }
     589             : 
     590             :     /*
     591             :      * Use ZERO_ON_ERROR mode, and initialize the page if necessary. The FSM
     592             :      * information is not accurate anyway, so it's better to clear corrupt
     593             :      * pages than error out. Since the FSM changes are not WAL-logged, the
     594             :      * so-called torn page problem on crash can lead to pages with corrupt
     595             :      * headers, for example.
     596             :      */
     597       25992 :     buf = ReadBufferExtended(rel, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, NULL);
     598       25992 :     if (PageIsNew(BufferGetPage(buf)))
     599           0 :         PageInit(BufferGetPage(buf), BLCKSZ, 0);
     600       25992 :     return buf;
     601             : }
     602             : 
     603             : /*
     604             :  * Ensure that the FSM fork is at least fsm_nblocks long, extending
     605             :  * it if necessary with empty pages. And by empty, I mean pages filled
     606             :  * with zeros, meaning there's no free space.
     607             :  */
     608             : static void
     609         223 : fsm_extend(Relation rel, BlockNumber fsm_nblocks)
     610             : {
     611             :     BlockNumber fsm_nblocks_now;
     612             :     Page        pg;
     613             : 
     614         223 :     pg = (Page) palloc(BLCKSZ);
     615         223 :     PageInit(pg, BLCKSZ, 0);
     616             : 
     617             :     /*
     618             :      * We use the relation extension lock to lock out other backends trying to
     619             :      * extend the FSM at the same time. It also locks out extension of the
     620             :      * main fork, unnecessarily, but extending the FSM happens seldom enough
     621             :      * that it doesn't seem worthwhile to have a separate lock tag type for
     622             :      * it.
     623             :      *
     624             :      * Note that another backend might have extended or created the relation
     625             :      * by the time we get the lock.
     626             :      */
     627         223 :     LockRelationForExtension(rel, ExclusiveLock);
     628             : 
     629             :     /* Might have to re-open if a cache flush happened */
     630         223 :     RelationOpenSmgr(rel);
     631             : 
     632             :     /*
     633             :      * Create the FSM file first if it doesn't exist.  If smgr_fsm_nblocks is
     634             :      * positive then it must exist, no need for an smgrexists call.
     635             :      */
     636         223 :     if ((rel->rd_smgr->smgr_fsm_nblocks == 0 ||
     637         223 :          rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber) &&
     638         223 :         !smgrexists(rel->rd_smgr, FSM_FORKNUM))
     639         197 :         smgrcreate(rel->rd_smgr, FSM_FORKNUM, false);
     640             : 
     641         223 :     fsm_nblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM);
     642             : 
     643        1115 :     while (fsm_nblocks_now < fsm_nblocks)
     644             :     {
     645         669 :         PageSetChecksumInplace(pg, fsm_nblocks_now);
     646             : 
     647         669 :         smgrextend(rel->rd_smgr, FSM_FORKNUM, fsm_nblocks_now,
     648             :                    (char *) pg, false);
     649         669 :         fsm_nblocks_now++;
     650             :     }
     651             : 
     652             :     /* Update local cache with the up-to-date size */
     653         223 :     rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now;
     654             : 
     655         223 :     UnlockRelationForExtension(rel, ExclusiveLock);
     656             : 
     657         223 :     pfree(pg);
     658         223 : }
     659             : 
     660             : /*
     661             :  * Set value in given FSM page and slot.
     662             :  *
     663             :  * If minValue > 0, the updated page is also searched for a page with at
     664             :  * least minValue of free space. If one is found, its slot number is
     665             :  * returned, -1 otherwise.
     666             :  */
     667             : static int
     668       13766 : fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
     669             :                    uint8 newValue, uint8 minValue)
     670             : {
     671             :     Buffer      buf;
     672             :     Page        page;
     673       13766 :     int         newslot = -1;
     674             : 
     675       13766 :     buf = fsm_readbuf(rel, addr, true);
     676       13766 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     677             : 
     678       13766 :     page = BufferGetPage(buf);
     679             : 
     680       13766 :     if (fsm_set_avail(page, slot, newValue))
     681        7667 :         MarkBufferDirtyHint(buf, false);
     682             : 
     683       13766 :     if (minValue != 0)
     684             :     {
     685             :         /* Search while we still hold the lock */
     686        8386 :         newslot = fsm_search_avail(buf, minValue,
     687        8386 :                                    addr.level == FSM_BOTTOM_LEVEL,
     688             :                                    true);
     689             :     }
     690             : 
     691       13766 :     UnlockReleaseBuffer(buf);
     692             : 
     693       13766 :     return newslot;
     694             : }
     695             : 
     696             : /*
     697             :  * Search the tree for a heap page with at least min_cat of free space
     698             :  */
     699             : static BlockNumber
     700       13449 : fsm_search(Relation rel, uint8 min_cat)
     701             : {
     702       13449 :     int         restarts = 0;
     703       13449 :     FSMAddress  addr = FSM_ROOT_ADDRESS;
     704             : 
     705             :     for (;;)
     706             :     {
     707             :         int         slot;
     708             :         Buffer      buf;
     709       15589 :         uint8       max_avail = 0;
     710             : 
     711             :         /* Read the FSM page. */
     712       15589 :         buf = fsm_readbuf(rel, addr, false);
     713             : 
     714             :         /* Search within the page */
     715       15589 :         if (BufferIsValid(buf))
     716             :         {
     717       11419 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     718       11419 :             slot = fsm_search_avail(buf, min_cat,
     719       11419 :                                     (addr.level == FSM_BOTTOM_LEVEL),
     720             :                                     false);
     721       11419 :             if (slot == -1)
     722        8443 :                 max_avail = fsm_get_max_avail(BufferGetPage(buf));
     723       11419 :             UnlockReleaseBuffer(buf);
     724             :         }
     725             :         else
     726        4170 :             slot = -1;
     727             : 
     728       15589 :         if (slot != -1)
     729             :         {
     730             :             /*
     731             :              * Descend the tree, or return the found block if we're at the
     732             :              * bottom.
     733             :              */
     734        2976 :             if (addr.level == FSM_BOTTOM_LEVEL)
     735         940 :                 return fsm_get_heap_blk(addr, slot);
     736             : 
     737        2036 :             addr = fsm_get_child(addr, slot);
     738             :         }
     739       12613 :         else if (addr.level == FSM_ROOT_LEVEL)
     740             :         {
     741             :             /*
     742             :              * At the root, failure means there's no page with enough free
     743             :              * space in the FSM. Give up.
     744             :              */
     745       12509 :             return InvalidBlockNumber;
     746             :         }
     747             :         else
     748             :         {
     749             :             uint16      parentslot;
     750             :             FSMAddress  parent;
     751             : 
     752             :             /*
     753             :              * At lower level, failure can happen if the value in the upper-
     754             :              * level node didn't reflect the value on the lower page. Update
     755             :              * the upper node, to avoid falling into the same trap again, and
     756             :              * start over.
     757             :              *
     758             :              * There's a race condition here, if another backend updates this
     759             :              * page right after we release it, and gets the lock on the parent
     760             :              * page before us. We'll then update the parent page with the now
     761             :              * stale information we had. It's OK, because it should happen
     762             :              * rarely, and will be fixed by the next vacuum.
     763             :              */
     764         104 :             parent = fsm_get_parent(addr, &parentslot);
     765         104 :             fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
     766             : 
     767             :             /*
     768             :              * If the upper pages are badly out of date, we might need to loop
     769             :              * quite a few times, updating them as we go. Any inconsistencies
     770             :              * should eventually be corrected and the loop should end. Looping
     771             :              * indefinitely is nevertheless scary, so provide an emergency
     772             :              * valve.
     773             :              */
     774         104 :             if (restarts++ > 10000)
     775           0 :                 return InvalidBlockNumber;
     776             : 
     777             :             /* Start search all over from the root */
     778         104 :             addr = FSM_ROOT_ADDRESS;
     779             :         }
     780        2140 :     }
     781             : }
     782             : 
     783             : 
     784             : /*
     785             :  * Recursive guts of FreeSpaceMapVacuum
     786             :  */
     787             : static uint8
     788        1866 : fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
     789             : {
     790             :     Buffer      buf;
     791             :     Page        page;
     792             :     uint8       max_avail;
     793             : 
     794             :     /* Read the page if it exists, or return EOF */
     795        1866 :     buf = fsm_readbuf(rel, addr, false);
     796        1866 :     if (!BufferIsValid(buf))
     797             :     {
     798        1080 :         *eof_p = true;
     799        1080 :         return 0;
     800             :     }
     801             :     else
     802         786 :         *eof_p = false;
     803             : 
     804         786 :     page = BufferGetPage(buf);
     805             : 
     806             :     /*
     807             :      * Recurse into children, and fix the information stored about them at
     808             :      * this level.
     809             :      */
     810         786 :     if (addr.level > FSM_BOTTOM_LEVEL)
     811             :     {
     812             :         int         slot;
     813         526 :         bool        eof = false;
     814             : 
     815     2140820 :         for (slot = 0; slot < SlotsPerFSMPage; slot++)
     816             :         {
     817             :             int         child_avail;
     818             : 
     819     2140294 :             CHECK_FOR_INTERRUPTS();
     820             : 
     821             :             /* After we hit end-of-file, just clear the rest of the slots */
     822     2140294 :             if (!eof)
     823        1049 :                 child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof);
     824             :             else
     825     2139245 :                 child_avail = 0;
     826             : 
     827             :             /* Update information about the child */
     828     2140294 :             if (fsm_get_avail(page, slot) != child_avail)
     829             :             {
     830         454 :                 LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     831         454 :                 fsm_set_avail(BufferGetPage(buf), slot, child_avail);
     832         454 :                 MarkBufferDirtyHint(buf, false);
     833         454 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     834             :             }
     835             :         }
     836             :     }
     837             : 
     838         786 :     max_avail = fsm_get_max_avail(BufferGetPage(buf));
     839             : 
     840             :     /*
     841             :      * Reset the next slot pointer. This encourages the use of low-numbered
     842             :      * pages, increasing the chances that a later vacuum can truncate the
     843             :      * relation.
     844             :      */
     845         786 :     ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
     846             : 
     847         786 :     ReleaseBuffer(buf);
     848             : 
     849         786 :     return max_avail;
     850             : }
     851             : 
     852             : /*
     853             :  * This function will return the last block number stored on given
     854             :  * FSM page address.
     855             :  */
     856             : static BlockNumber
     857           6 : fsm_get_lastblckno(Relation rel, FSMAddress addr)
     858             : {
     859             :     int         slot;
     860             : 
     861             :     /*
     862             :      * Get the last slot number on the given address and convert that to block
     863             :      * number
     864             :      */
     865           6 :     slot = SlotsPerFSMPage - 1;
     866           6 :     return fsm_get_heap_blk(addr, slot);
     867             : }
     868             : 
     869             : /*
     870             :  * Recursively update the FSM tree from given address to
     871             :  * all the way up to root.
     872             :  */
     873             : static void
     874          18 : fsm_update_recursive(Relation rel, FSMAddress addr, uint8 new_cat)
     875             : {
     876             :     uint16      parentslot;
     877             :     FSMAddress  parent;
     878             : 
     879          18 :     if (addr.level == FSM_ROOT_LEVEL)
     880          24 :         return;
     881             : 
     882             :     /*
     883             :      * Get the parent page and our slot in the parent page, and update the
     884             :      * information in that.
     885             :      */
     886          12 :     parent = fsm_get_parent(addr, &parentslot);
     887          12 :     fsm_set_and_search(rel, parent, parentslot, new_cat, 0);
     888          12 :     fsm_update_recursive(rel, parent, new_cat);
     889             : }

Generated by: LCOV version 1.11