LCOV - code coverage report
Current view: top level - src/backend/access/heap - pruneheap.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 210 218 96.3 %
Date: 2017-09-29 15:12:54 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pruneheap.c
       4             :  *    heap page pruning and HOT-chain management code
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/heap/pruneheap.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/heapam.h"
      18             : #include "access/heapam_xlog.h"
      19             : #include "access/transam.h"
      20             : #include "access/htup_details.h"
      21             : #include "access/xlog.h"
      22             : #include "catalog/catalog.h"
      23             : #include "miscadmin.h"
      24             : #include "pgstat.h"
      25             : #include "storage/bufmgr.h"
      26             : #include "utils/snapmgr.h"
      27             : #include "utils/rel.h"
      28             : #include "utils/tqual.h"
      29             : 
      30             : /* Working data for heap_page_prune and subroutines */
      31             : typedef struct
      32             : {
      33             :     TransactionId new_prune_xid;    /* new prune hint value for page */
      34             :     TransactionId latestRemovedXid; /* latest xid to be removed by this prune */
      35             :     int         nredirected;    /* numbers of entries in arrays below */
      36             :     int         ndead;
      37             :     int         nunused;
      38             :     /* arrays that accumulate indexes of items to be changed */
      39             :     OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
      40             :     OffsetNumber nowdead[MaxHeapTuplesPerPage];
      41             :     OffsetNumber nowunused[MaxHeapTuplesPerPage];
      42             :     /* marked[i] is TRUE if item i is entered in one of the above arrays */
      43             :     bool        marked[MaxHeapTuplesPerPage + 1];
      44             : } PruneState;
      45             : 
      46             : /* Local functions */
      47             : static int heap_prune_chain(Relation relation, Buffer buffer,
      48             :                  OffsetNumber rootoffnum,
      49             :                  TransactionId OldestXmin,
      50             :                  PruneState *prstate);
      51             : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
      52             : static void heap_prune_record_redirect(PruneState *prstate,
      53             :                            OffsetNumber offnum, OffsetNumber rdoffnum);
      54             : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
      55             : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
      56             : 
      57             : 
      58             : /*
      59             :  * Optionally prune and repair fragmentation in the specified page.
      60             :  *
      61             :  * This is an opportunistic function.  It will perform housekeeping
      62             :  * only if the page heuristically looks like a candidate for pruning and we
      63             :  * can acquire buffer cleanup lock without blocking.
      64             :  *
      65             :  * Note: this is called quite often.  It's important that it fall out quickly
      66             :  * if there's not any use in pruning.
      67             :  *
      68             :  * Caller must have pin on the buffer, and must *not* have a lock on it.
      69             :  *
      70             :  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
      71             :  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
      72             :  */
      73             : void
      74      499007 : heap_page_prune_opt(Relation relation, Buffer buffer)
      75             : {
      76      499007 :     Page        page = BufferGetPage(buffer);
      77             :     Size        minfree;
      78             :     TransactionId OldestXmin;
      79             : 
      80             :     /*
      81             :      * We can't write WAL in recovery mode, so there's no point trying to
      82             :      * clean the page. The master will likely issue a cleaning WAL record soon
      83             :      * anyway, so this is no particular loss.
      84             :      */
      85      499007 :     if (RecoveryInProgress())
      86           0 :         return;
      87             : 
      88             :     /*
      89             :      * Use the appropriate xmin horizon for this relation. If it's a proper
      90             :      * catalog relation or a user defined, additional, catalog relation, we
      91             :      * need to use the horizon that includes slots, otherwise the data-only
      92             :      * horizon can be used. Note that the toast relation of user defined
      93             :      * relations are *not* considered catalog relations.
      94             :      *
      95             :      * It is OK to apply the old snapshot limit before acquiring the cleanup
      96             :      * lock because the worst that can happen is that we are not quite as
      97             :      * aggressive about the cleanup (by however many transaction IDs are
      98             :      * consumed between this point and acquiring the lock).  This allows us to
      99             :      * save significant overhead in the case where the page is found not to be
     100             :      * prunable.
     101             :      */
     102      665660 :     if (IsCatalogRelation(relation) ||
     103      166653 :         RelationIsAccessibleInLogicalDecoding(relation))
     104      332354 :         OldestXmin = RecentGlobalXmin;
     105             :     else
     106      166653 :         OldestXmin =
     107      166653 :             TransactionIdLimitedForOldSnapshots(RecentGlobalDataXmin,
     108             :                                                 relation);
     109             : 
     110      499007 :     Assert(TransactionIdIsValid(OldestXmin));
     111             : 
     112             :     /*
     113             :      * Let's see if we really need pruning.
     114             :      *
     115             :      * Forget it if page is not hinted to contain something prunable that's
     116             :      * older than OldestXmin.
     117             :      */
     118      499007 :     if (!PageIsPrunable(page, OldestXmin))
     119      427845 :         return;
     120             : 
     121             :     /*
     122             :      * We prune when a previous UPDATE failed to find enough space on the page
     123             :      * for a new tuple version, or when free space falls below the relation's
     124             :      * fill-factor target (but not less than 10%).
     125             :      *
     126             :      * Checking free space here is questionable since we aren't holding any
     127             :      * lock on the buffer; in the worst case we could get a bogus answer. It's
     128             :      * unlikely to be *seriously* wrong, though, since reading either pd_lower
     129             :      * or pd_upper is probably atomic.  Avoiding taking a lock seems more
     130             :      * important than sometimes getting a wrong answer in what is after all
     131             :      * just a heuristic estimate.
     132             :      */
     133       71162 :     minfree = RelationGetTargetPageFreeSpace(relation,
     134             :                                              HEAP_DEFAULT_FILLFACTOR);
     135       71162 :     minfree = Max(minfree, BLCKSZ / 10);
     136             : 
     137       71162 :     if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     138             :     {
     139             :         /* OK, try to get exclusive buffer lock */
     140        2327 :         if (!ConditionalLockBufferForCleanup(buffer))
     141          19 :             return;
     142             : 
     143             :         /*
     144             :          * Now that we have buffer lock, get accurate information about the
     145             :          * page's free space, and recheck the heuristic about whether to
     146             :          * prune. (We needn't recheck PageIsPrunable, since no one else could
     147             :          * have pruned while we hold pin.)
     148             :          */
     149        2308 :         if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     150             :         {
     151        2308 :             TransactionId ignore = InvalidTransactionId;    /* return value not
     152             :                                                              * needed */
     153             : 
     154             :             /* OK to prune */
     155        2308 :             (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
     156             :         }
     157             : 
     158             :         /* And release buffer lock */
     159        2308 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
     160             :     }
     161             : }
     162             : 
     163             : 
     164             : /*
     165             :  * Prune and repair fragmentation in the specified page.
     166             :  *
     167             :  * Caller must have pin and buffer cleanup lock on the page.
     168             :  *
     169             :  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
     170             :  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
     171             :  *
     172             :  * If report_stats is true then we send the number of reclaimed heap-only
     173             :  * tuples to pgstats.  (This must be FALSE during vacuum, since vacuum will
     174             :  * send its own new total to pgstats, and we don't want this delta applied
     175             :  * on top of that.)
     176             :  *
     177             :  * Returns the number of tuples deleted from the page and sets
     178             :  * latestRemovedXid.
     179             :  */
     180             : int
     181        7047 : heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
     182             :                 bool report_stats, TransactionId *latestRemovedXid)
     183             : {
     184        7047 :     int         ndeleted = 0;
     185        7047 :     Page        page = BufferGetPage(buffer);
     186             :     OffsetNumber offnum,
     187             :                 maxoff;
     188             :     PruneState  prstate;
     189             : 
     190             :     /*
     191             :      * Our strategy is to scan the page and make lists of items to change,
     192             :      * then apply the changes within a critical section.  This keeps as much
     193             :      * logic as possible out of the critical section, and also ensures that
     194             :      * WAL replay will work the same as the normal case.
     195             :      *
     196             :      * First, initialize the new pd_prune_xid value to zero (indicating no
     197             :      * prunable tuples).  If we find any tuples which may soon become
     198             :      * prunable, we will save the lowest relevant XID in new_prune_xid. Also
     199             :      * initialize the rest of our working state.
     200             :      */
     201        7047 :     prstate.new_prune_xid = InvalidTransactionId;
     202        7047 :     prstate.latestRemovedXid = *latestRemovedXid;
     203        7047 :     prstate.nredirected = prstate.ndead = prstate.nunused = 0;
     204        7047 :     memset(prstate.marked, 0, sizeof(prstate.marked));
     205             : 
     206             :     /* Scan the page */
     207        7047 :     maxoff = PageGetMaxOffsetNumber(page);
     208      690197 :     for (offnum = FirstOffsetNumber;
     209             :          offnum <= maxoff;
     210      676103 :          offnum = OffsetNumberNext(offnum))
     211             :     {
     212             :         ItemId      itemid;
     213             : 
     214             :         /* Ignore items already processed as part of an earlier chain */
     215      676103 :         if (prstate.marked[offnum])
     216        4209 :             continue;
     217             : 
     218             :         /* Nothing to do if slot is empty or already dead */
     219      671894 :         itemid = PageGetItemId(page, offnum);
     220      671894 :         if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
     221      110198 :             continue;
     222             : 
     223             :         /* Process this item or chain of items */
     224      561696 :         ndeleted += heap_prune_chain(relation, buffer, offnum,
     225             :                                      OldestXmin,
     226             :                                      &prstate);
     227             :     }
     228             : 
     229             :     /* Any error while applying the changes is critical */
     230        7047 :     START_CRIT_SECTION();
     231             : 
     232             :     /* Have we found any prunable items? */
     233        7047 :     if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
     234             :     {
     235             :         /*
     236             :          * Apply the planned item changes, then repair page fragmentation, and
     237             :          * update the page's hint bit about whether it has free line pointers.
     238             :          */
     239        3029 :         heap_page_prune_execute(buffer,
     240             :                                 prstate.redirected, prstate.nredirected,
     241             :                                 prstate.nowdead, prstate.ndead,
     242             :                                 prstate.nowunused, prstate.nunused);
     243             : 
     244             :         /*
     245             :          * Update the page's pd_prune_xid field to either zero, or the lowest
     246             :          * XID of any soon-prunable tuple.
     247             :          */
     248        3029 :         ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
     249             : 
     250             :         /*
     251             :          * Also clear the "page is full" flag, since there's no point in
     252             :          * repeating the prune/defrag process until something else happens to
     253             :          * the page.
     254             :          */
     255        3029 :         PageClearFull(page);
     256             : 
     257        3029 :         MarkBufferDirty(buffer);
     258             : 
     259             :         /*
     260             :          * Emit a WAL HEAP_CLEAN record showing what we did
     261             :          */
     262        6058 :         if (RelationNeedsWAL(relation))
     263             :         {
     264             :             XLogRecPtr  recptr;
     265             : 
     266        3028 :             recptr = log_heap_clean(relation, buffer,
     267             :                                     prstate.redirected, prstate.nredirected,
     268             :                                     prstate.nowdead, prstate.ndead,
     269             :                                     prstate.nowunused, prstate.nunused,
     270             :                                     prstate.latestRemovedXid);
     271             : 
     272        3028 :             PageSetLSN(BufferGetPage(buffer), recptr);
     273             :         }
     274             :     }
     275             :     else
     276             :     {
     277             :         /*
     278             :          * If we didn't prune anything, but have found a new value for the
     279             :          * pd_prune_xid field, update it and mark the buffer dirty. This is
     280             :          * treated as a non-WAL-logged hint.
     281             :          *
     282             :          * Also clear the "page is full" flag if it is set, since there's no
     283             :          * point in repeating the prune/defrag process until something else
     284             :          * happens to the page.
     285             :          */
     286        8019 :         if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
     287        4001 :             PageIsFull(page))
     288             :         {
     289          18 :             ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
     290          18 :             PageClearFull(page);
     291          18 :             MarkBufferDirtyHint(buffer, true);
     292             :         }
     293             :     }
     294             : 
     295        7047 :     END_CRIT_SECTION();
     296             : 
     297             :     /*
     298             :      * If requested, report the number of tuples reclaimed to pgstats. This is
     299             :      * ndeleted minus ndead, because we don't want to count a now-DEAD root
     300             :      * item as a deletion for this purpose.
     301             :      */
     302        7047 :     if (report_stats && ndeleted > prstate.ndead)
     303         706 :         pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
     304             : 
     305        7047 :     *latestRemovedXid = prstate.latestRemovedXid;
     306             : 
     307             :     /*
     308             :      * XXX Should we update the FSM information of this page ?
     309             :      *
     310             :      * There are two schools of thought here. We may not want to update FSM
     311             :      * information so that the page is not used for unrelated UPDATEs/INSERTs
     312             :      * and any free space in this page will remain available for further
     313             :      * UPDATEs in *this* page, thus improving chances for doing HOT updates.
     314             :      *
     315             :      * But for a large table and where a page does not receive further UPDATEs
     316             :      * for a long time, we might waste this space by not updating the FSM
     317             :      * information. The relation may get extended and fragmented further.
     318             :      *
     319             :      * One possibility is to leave "fillfactor" worth of space in this page
     320             :      * and update FSM with the remaining space.
     321             :      */
     322             : 
     323        7047 :     return ndeleted;
     324             : }
     325             : 
     326             : 
     327             : /*
     328             :  * Prune specified item pointer or a HOT chain originating at that item.
     329             :  *
     330             :  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
     331             :  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
     332             :  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
     333             :  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
     334             :  * DEAD, the OldestXmin test is just too coarse to detect it.
     335             :  *
     336             :  * The root line pointer is redirected to the tuple immediately after the
     337             :  * latest DEAD tuple.  If all tuples in the chain are DEAD, the root line
     338             :  * pointer is marked LP_DEAD.  (This includes the case of a DEAD simple
     339             :  * tuple, which we treat as a chain of length 1.)
     340             :  *
     341             :  * OldestXmin is the cutoff XID used to identify dead tuples.
     342             :  *
     343             :  * We don't actually change the page here, except perhaps for hint-bit updates
     344             :  * caused by HeapTupleSatisfiesVacuum.  We just add entries to the arrays in
     345             :  * prstate showing the changes to be made.  Items to be redirected are added
     346             :  * to the redirected[] array (two entries per redirection); items to be set to
     347             :  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
     348             :  * state are added to nowunused[].
     349             :  *
     350             :  * Returns the number of tuples (to be) deleted from the page.
     351             :  */
     352             : static int
     353      561696 : heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
     354             :                  TransactionId OldestXmin,
     355             :                  PruneState *prstate)
     356             : {
     357      561696 :     int         ndeleted = 0;
     358      561696 :     Page        dp = (Page) BufferGetPage(buffer);
     359      561696 :     TransactionId priorXmax = InvalidTransactionId;
     360             :     ItemId      rootlp;
     361             :     HeapTupleHeader htup;
     362      561696 :     OffsetNumber latestdead = InvalidOffsetNumber,
     363      561696 :                 maxoff = PageGetMaxOffsetNumber(dp),
     364             :                 offnum;
     365             :     OffsetNumber chainitems[MaxHeapTuplesPerPage];
     366      561696 :     int         nchain = 0,
     367             :                 i;
     368             :     HeapTupleData tup;
     369             : 
     370      561696 :     tup.t_tableOid = RelationGetRelid(relation);
     371             : 
     372      561696 :     rootlp = PageGetItemId(dp, rootoffnum);
     373             : 
     374             :     /*
     375             :      * If it's a heap-only tuple, then it is not the start of a HOT chain.
     376             :      */
     377      561696 :     if (ItemIdIsNormal(rootlp))
     378             :     {
     379      558489 :         htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
     380             : 
     381      558489 :         tup.t_data = htup;
     382      558489 :         tup.t_len = ItemIdGetLength(rootlp);
     383      558489 :         ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
     384             : 
     385      558489 :         if (HeapTupleHeaderIsHeapOnly(htup))
     386             :         {
     387             :             /*
     388             :              * If the tuple is DEAD and doesn't chain to anything else, mark
     389             :              * it unused immediately.  (If it does chain, we can only remove
     390             :              * it as part of pruning its chain.)
     391             :              *
     392             :              * We need this primarily to handle aborted HOT updates, that is,
     393             :              * XMIN_INVALID heap-only tuples.  Those might not be linked to by
     394             :              * any chain, since the parent tuple might be re-updated before
     395             :              * any pruning occurs.  So we have to be able to reap them
     396             :              * separately from chain-pruning.  (Note that
     397             :              * HeapTupleHeaderIsHotUpdated will never return true for an
     398             :              * XMIN_INVALID tuple, so this code will work even when there were
     399             :              * sequential updates within the aborted transaction.)
     400             :              *
     401             :              * Note that we might first arrive at a dead heap-only tuple
     402             :              * either here or while following a chain below.  Whichever path
     403             :              * gets there first will mark the tuple unused.
     404             :              */
     405        3966 :             if (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer)
     406         430 :                 == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
     407             :             {
     408         285 :                 heap_prune_record_unused(prstate, rootoffnum);
     409         285 :                 HeapTupleHeaderAdvanceLatestRemovedXid(htup,
     410             :                                                        &prstate->latestRemovedXid);
     411         285 :                 ndeleted++;
     412             :             }
     413             : 
     414             :             /* Nothing more to do */
     415        3966 :             return ndeleted;
     416             :         }
     417             :     }
     418             : 
     419             :     /* Start from the root tuple */
     420      557730 :     offnum = rootoffnum;
     421             : 
     422             :     /* while not end of the chain */
     423             :     for (;;)
     424             :     {
     425             :         ItemId      lp;
     426             :         bool        tupdead,
     427             :                     recent_dead;
     428             : 
     429             :         /* Some sanity checks */
     430      565468 :         if (offnum < FirstOffsetNumber || offnum > maxoff)
     431             :             break;
     432             : 
     433             :         /* If item is already processed, stop --- it must not be same chain */
     434      565468 :         if (prstate->marked[offnum])
     435          83 :             break;
     436             : 
     437      565385 :         lp = PageGetItemId(dp, offnum);
     438             : 
     439             :         /* Unused item obviously isn't part of the chain */
     440      565385 :         if (!ItemIdIsUsed(lp))
     441           0 :             break;
     442             : 
     443             :         /*
     444             :          * If we are looking at the redirected root line pointer, jump to the
     445             :          * first normal tuple in the chain.  If we find a redirect somewhere
     446             :          * else, stop --- it must not be same chain.
     447             :          */
     448      565385 :         if (ItemIdIsRedirected(lp))
     449             :         {
     450        3207 :             if (nchain > 0)
     451           0 :                 break;          /* not at start of chain */
     452        3207 :             chainitems[nchain++] = offnum;
     453        3207 :             offnum = ItemIdGetRedirect(rootlp);
     454        3207 :             continue;
     455             :         }
     456             : 
     457             :         /*
     458             :          * Likewise, a dead item pointer can't be part of the chain. (We
     459             :          * already eliminated the case of dead root tuple outside this
     460             :          * function.)
     461             :          */
     462      562178 :         if (ItemIdIsDead(lp))
     463           0 :             break;
     464             : 
     465      562178 :         Assert(ItemIdIsNormal(lp));
     466      562178 :         htup = (HeapTupleHeader) PageGetItem(dp, lp);
     467             : 
     468      562178 :         tup.t_data = htup;
     469      562178 :         tup.t_len = ItemIdGetLength(lp);
     470      562178 :         ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
     471             : 
     472             :         /*
     473             :          * Check the tuple XMIN against prior XMAX, if any
     474             :          */
     475      566648 :         if (TransactionIdIsValid(priorXmax) &&
     476        4470 :             !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
     477           0 :             break;
     478             : 
     479             :         /*
     480             :          * OK, this tuple is indeed a member of the chain.
     481             :          */
     482      562178 :         chainitems[nchain++] = offnum;
     483             : 
     484             :         /*
     485             :          * Check tuple's visibility status.
     486             :          */
     487      562178 :         tupdead = recent_dead = false;
     488             : 
     489      562178 :         switch (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer))
     490             :         {
     491             :             case HEAPTUPLE_DEAD:
     492      107106 :                 tupdead = true;
     493      107106 :                 break;
     494             : 
     495             :             case HEAPTUPLE_RECENTLY_DEAD:
     496       40672 :                 recent_dead = true;
     497             : 
     498             :                 /*
     499             :                  * This tuple may soon become DEAD.  Update the hint field so
     500             :                  * that the page is reconsidered for pruning in future.
     501             :                  */
     502       40672 :                 heap_prune_record_prunable(prstate,
     503       81344 :                                            HeapTupleHeaderGetUpdateXid(htup));
     504       40672 :                 break;
     505             : 
     506             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
     507             : 
     508             :                 /*
     509             :                  * This tuple may soon become DEAD.  Update the hint field so
     510             :                  * that the page is reconsidered for pruning in future.
     511             :                  */
     512         441 :                 heap_prune_record_prunable(prstate,
     513         882 :                                            HeapTupleHeaderGetUpdateXid(htup));
     514         441 :                 break;
     515             : 
     516             :             case HEAPTUPLE_LIVE:
     517             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
     518             : 
     519             :                 /*
     520             :                  * If we wanted to optimize for aborts, we might consider
     521             :                  * marking the page prunable when we see INSERT_IN_PROGRESS.
     522             :                  * But we don't.  See related decisions about when to mark the
     523             :                  * page prunable in heapam.c.
     524             :                  */
     525      413959 :                 break;
     526             : 
     527             :             default:
     528           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
     529             :                 break;
     530             :         }
     531             : 
     532             :         /*
     533             :          * Remember the last DEAD tuple seen.  We will advance past
     534             :          * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
     535             :          * but we can't advance past anything else.  (XXX is it really worth
     536             :          * continuing to scan beyond RECENTLY_DEAD?  The case where we will
     537             :          * find another DEAD tuple is a fairly unusual corner case.)
     538             :          */
     539      562178 :         if (tupdead)
     540             :         {
     541      107106 :             latestdead = offnum;
     542      107106 :             HeapTupleHeaderAdvanceLatestRemovedXid(htup,
     543             :                                                    &prstate->latestRemovedXid);
     544             :         }
     545      455072 :         else if (!recent_dead)
     546      414400 :             break;
     547             : 
     548             :         /*
     549             :          * If the tuple is not HOT-updated, then we are at the end of this
     550             :          * HOT-update chain.
     551             :          */
     552      147778 :         if (!HeapTupleHeaderIsHotUpdated(htup))
     553             :             break;
     554             : 
     555             :         /*
     556             :          * Advance to next chain member.
     557             :          */
     558        4531 :         Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
     559             :                BufferGetBlockNumber(buffer));
     560        4531 :         offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
     561        4531 :         priorXmax = HeapTupleHeaderGetUpdateXid(htup);
     562        7738 :     }
     563             : 
     564             :     /*
     565             :      * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
     566             :      * the DEAD tuples at the start of the chain are removed and the root line
     567             :      * pointer is appropriately redirected.
     568             :      */
     569      557730 :     if (OffsetNumberIsValid(latestdead))
     570             :     {
     571             :         /*
     572             :          * Mark as unused each intermediate item that we are able to remove
     573             :          * from the chain.
     574             :          *
     575             :          * When the previous item is the last dead tuple seen, we are at the
     576             :          * right candidate for redirection.
     577             :          */
     578      107674 :         for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
     579             :         {
     580        3106 :             heap_prune_record_unused(prstate, chainitems[i]);
     581        3106 :             ndeleted++;
     582             :         }
     583             : 
     584             :         /*
     585             :          * If the root entry had been a normal tuple, we are deleting it, so
     586             :          * count it in the result.  But changing a redirect (even to DEAD
     587             :          * state) doesn't count.
     588             :          */
     589      104568 :         if (ItemIdIsNormal(rootlp))
     590      104000 :             ndeleted++;
     591             : 
     592             :         /*
     593             :          * If the DEAD tuple is at the end of the chain, the entire chain is
     594             :          * dead and the root line pointer can be marked dead.  Otherwise just
     595             :          * redirect the root to the correct chain member.
     596             :          */
     597      104568 :         if (i >= nchain)
     598      103243 :             heap_prune_record_dead(prstate, rootoffnum);
     599             :         else
     600        1325 :             heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
     601             :     }
     602      453162 :     else if (nchain < 2 && ItemIdIsRedirected(rootlp))
     603             :     {
     604             :         /*
     605             :          * We found a redirect item that doesn't point to a valid follow-on
     606             :          * item.  This can happen if the loop in heap_page_prune caused us to
     607             :          * visit the dead successor of a redirect item before visiting the
     608             :          * redirect item.  We can clean up by setting the redirect item to
     609             :          * DEAD state.
     610             :          */
     611          22 :         heap_prune_record_dead(prstate, rootoffnum);
     612             :     }
     613             : 
     614      557730 :     return ndeleted;
     615             : }
     616             : 
     617             : /* Record lowest soon-prunable XID */
     618             : static void
     619       41113 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
     620             : {
     621             :     /*
     622             :      * This should exactly match the PageSetPrunable macro.  We can't store
     623             :      * directly into the page header yet, so we update working state.
     624             :      */
     625       41113 :     Assert(TransactionIdIsNormal(xid));
     626       81471 :     if (!TransactionIdIsValid(prstate->new_prune_xid) ||
     627       40358 :         TransactionIdPrecedes(xid, prstate->new_prune_xid))
     628         848 :         prstate->new_prune_xid = xid;
     629       41113 : }
     630             : 
     631             : /* Record item pointer to be redirected */
     632             : static void
     633        1325 : heap_prune_record_redirect(PruneState *prstate,
     634             :                            OffsetNumber offnum, OffsetNumber rdoffnum)
     635             : {
     636        1325 :     Assert(prstate->nredirected < MaxHeapTuplesPerPage);
     637        1325 :     prstate->redirected[prstate->nredirected * 2] = offnum;
     638        1325 :     prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
     639        1325 :     prstate->nredirected++;
     640        1325 :     Assert(!prstate->marked[offnum]);
     641        1325 :     prstate->marked[offnum] = true;
     642        1325 :     Assert(!prstate->marked[rdoffnum]);
     643        1325 :     prstate->marked[rdoffnum] = true;
     644        1325 : }
     645             : 
     646             : /* Record item pointer to be marked dead */
     647             : static void
     648      103265 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
     649             : {
     650      103265 :     Assert(prstate->ndead < MaxHeapTuplesPerPage);
     651      103265 :     prstate->nowdead[prstate->ndead] = offnum;
     652      103265 :     prstate->ndead++;
     653      103265 :     Assert(!prstate->marked[offnum]);
     654      103265 :     prstate->marked[offnum] = true;
     655      103265 : }
     656             : 
     657             : /* Record item pointer to be marked unused */
     658             : static void
     659        3391 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
     660             : {
     661        3391 :     Assert(prstate->nunused < MaxHeapTuplesPerPage);
     662        3391 :     prstate->nowunused[prstate->nunused] = offnum;
     663        3391 :     prstate->nunused++;
     664        3391 :     Assert(!prstate->marked[offnum]);
     665        3391 :     prstate->marked[offnum] = true;
     666        3391 : }
     667             : 
     668             : 
     669             : /*
     670             :  * Perform the actual page changes needed by heap_page_prune.
     671             :  * It is expected that the caller has suitable pin and lock on the
     672             :  * buffer, and is inside a critical section.
     673             :  *
     674             :  * This is split out because it is also used by heap_xlog_clean()
     675             :  * to replay the WAL record when needed after a crash.  Note that the
     676             :  * arguments are identical to those of log_heap_clean().
     677             :  */
     678             : void
     679        3029 : heap_page_prune_execute(Buffer buffer,
     680             :                         OffsetNumber *redirected, int nredirected,
     681             :                         OffsetNumber *nowdead, int ndead,
     682             :                         OffsetNumber *nowunused, int nunused)
     683             : {
     684        3029 :     Page        page = (Page) BufferGetPage(buffer);
     685             :     OffsetNumber *offnum;
     686             :     int         i;
     687             : 
     688             :     /* Update all redirected line pointers */
     689        3029 :     offnum = redirected;
     690        4354 :     for (i = 0; i < nredirected; i++)
     691             :     {
     692        1325 :         OffsetNumber fromoff = *offnum++;
     693        1325 :         OffsetNumber tooff = *offnum++;
     694        1325 :         ItemId      fromlp = PageGetItemId(page, fromoff);
     695             : 
     696        1325 :         ItemIdSetRedirect(fromlp, tooff);
     697             :     }
     698             : 
     699             :     /* Update all now-dead line pointers */
     700        3029 :     offnum = nowdead;
     701      106294 :     for (i = 0; i < ndead; i++)
     702             :     {
     703      103265 :         OffsetNumber off = *offnum++;
     704      103265 :         ItemId      lp = PageGetItemId(page, off);
     705             : 
     706      103265 :         ItemIdSetDead(lp);
     707             :     }
     708             : 
     709             :     /* Update all now-unused line pointers */
     710        3029 :     offnum = nowunused;
     711        6420 :     for (i = 0; i < nunused; i++)
     712             :     {
     713        3391 :         OffsetNumber off = *offnum++;
     714        3391 :         ItemId      lp = PageGetItemId(page, off);
     715             : 
     716        3391 :         ItemIdSetUnused(lp);
     717             :     }
     718             : 
     719             :     /*
     720             :      * Finally, repair any fragmentation, and update the page's hint bit about
     721             :      * whether it has free pointers.
     722             :      */
     723        3029 :     PageRepairFragmentation(page);
     724        3029 : }
     725             : 
     726             : 
     727             : /*
     728             :  * For all items in this page, find their respective root line pointers.
     729             :  * If item k is part of a HOT-chain with root at item j, then we set
     730             :  * root_offsets[k - 1] = j.
     731             :  *
     732             :  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
     733             :  * We zero out all unused entries.
     734             :  *
     735             :  * The function must be called with at least share lock on the buffer, to
     736             :  * prevent concurrent prune operations.
     737             :  *
     738             :  * Note: The information collected here is valid only as long as the caller
     739             :  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
     740             :  * and reused by a completely unrelated tuple.
     741             :  */
     742             : void
     743       11603 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
     744             : {
     745             :     OffsetNumber offnum,
     746             :                 maxoff;
     747             : 
     748       11603 :     MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
     749             : 
     750       11603 :     maxoff = PageGetMaxOffsetNumber(page);
     751      765133 :     for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
     752             :     {
     753      753530 :         ItemId      lp = PageGetItemId(page, offnum);
     754             :         HeapTupleHeader htup;
     755             :         OffsetNumber nextoffnum;
     756             :         TransactionId priorXmax;
     757             : 
     758             :         /* skip unused and dead items */
     759      753530 :         if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
     760         162 :             continue;
     761             : 
     762      753368 :         if (ItemIdIsNormal(lp))
     763             :         {
     764      753363 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
     765             : 
     766             :             /*
     767             :              * Check if this tuple is part of a HOT-chain rooted at some other
     768             :              * tuple. If so, skip it for now; we'll process it when we find
     769             :              * its root.
     770             :              */
     771      753363 :             if (HeapTupleHeaderIsHeapOnly(htup))
     772          64 :                 continue;
     773             : 
     774             :             /*
     775             :              * This is either a plain tuple or the root of a HOT-chain.
     776             :              * Remember it in the mapping.
     777             :              */
     778      753299 :             root_offsets[offnum - 1] = offnum;
     779             : 
     780             :             /* If it's not the start of a HOT-chain, we're done with it */
     781      753299 :             if (!HeapTupleHeaderIsHotUpdated(htup))
     782      753253 :                 continue;
     783             : 
     784             :             /* Set up to scan the HOT-chain */
     785          46 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
     786          46 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
     787             :         }
     788             :         else
     789             :         {
     790             :             /* Must be a redirect item. We do not set its root_offsets entry */
     791           5 :             Assert(ItemIdIsRedirected(lp));
     792             :             /* Set up to scan the HOT-chain */
     793           5 :             nextoffnum = ItemIdGetRedirect(lp);
     794           5 :             priorXmax = InvalidTransactionId;
     795             :         }
     796             : 
     797             :         /*
     798             :          * Now follow the HOT-chain and collect other tuples in the chain.
     799             :          *
     800             :          * Note: Even though this is a nested loop, the complexity of the
     801             :          * function is O(N) because a tuple in the page should be visited not
     802             :          * more than twice, once in the outer loop and once in HOT-chain
     803             :          * chases.
     804             :          */
     805             :         for (;;)
     806             :         {
     807          64 :             lp = PageGetItemId(page, nextoffnum);
     808             : 
     809             :             /* Check for broken chains */
     810          64 :             if (!ItemIdIsNormal(lp))
     811           0 :                 break;
     812             : 
     813          64 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
     814             : 
     815         123 :             if (TransactionIdIsValid(priorXmax) &&
     816          59 :                 !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
     817           0 :                 break;
     818             : 
     819             :             /* Remember the root line pointer for this item */
     820          64 :             root_offsets[nextoffnum - 1] = offnum;
     821             : 
     822             :             /* Advance to next chain member, if any */
     823          64 :             if (!HeapTupleHeaderIsHotUpdated(htup))
     824             :                 break;
     825             : 
     826          13 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
     827          13 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
     828          13 :         }
     829             :     }
     830       11603 : }

Generated by: LCOV version 1.11