LCOV - code coverage report
Current view: top level - src/backend/access/heap - pruneheap.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 207 218 95.0 %
Date: 2017-09-29 13:40:31 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pruneheap.c
       4             :  *    heap page pruning and HOT-chain management code
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/heap/pruneheap.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/heapam.h"
      18             : #include "access/heapam_xlog.h"
      19             : #include "access/transam.h"
      20             : #include "access/htup_details.h"
      21             : #include "access/xlog.h"
      22             : #include "catalog/catalog.h"
      23             : #include "miscadmin.h"
      24             : #include "pgstat.h"
      25             : #include "storage/bufmgr.h"
      26             : #include "utils/snapmgr.h"
      27             : #include "utils/rel.h"
      28             : #include "utils/tqual.h"
      29             : 
      30             : /* Working data for heap_page_prune and subroutines */
      31             : typedef struct
      32             : {
      33             :     TransactionId new_prune_xid;    /* new prune hint value for page */
      34             :     TransactionId latestRemovedXid; /* latest xid to be removed by this prune */
      35             :     int         nredirected;    /* numbers of entries in arrays below */
      36             :     int         ndead;
      37             :     int         nunused;
      38             :     /* arrays that accumulate indexes of items to be changed */
      39             :     OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
      40             :     OffsetNumber nowdead[MaxHeapTuplesPerPage];
      41             :     OffsetNumber nowunused[MaxHeapTuplesPerPage];
      42             :     /* marked[i] is TRUE if item i is entered in one of the above arrays */
      43             :     bool        marked[MaxHeapTuplesPerPage + 1];
      44             : } PruneState;
      45             : 
      46             : /* Local functions */
      47             : static int heap_prune_chain(Relation relation, Buffer buffer,
      48             :                  OffsetNumber rootoffnum,
      49             :                  TransactionId OldestXmin,
      50             :                  PruneState *prstate);
      51             : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
      52             : static void heap_prune_record_redirect(PruneState *prstate,
      53             :                            OffsetNumber offnum, OffsetNumber rdoffnum);
      54             : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
      55             : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
      56             : 
      57             : 
      58             : /*
      59             :  * Optionally prune and repair fragmentation in the specified page.
      60             :  *
      61             :  * This is an opportunistic function.  It will perform housekeeping
      62             :  * only if the page heuristically looks like a candidate for pruning and we
      63             :  * can acquire buffer cleanup lock without blocking.
      64             :  *
      65             :  * Note: this is called quite often.  It's important that it fall out quickly
      66             :  * if there's not any use in pruning.
      67             :  *
      68             :  * Caller must have pin on the buffer, and must *not* have a lock on it.
      69             :  *
      70             :  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
      71             :  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
      72             :  */
      73             : void
      74      499213 : heap_page_prune_opt(Relation relation, Buffer buffer)
      75             : {
      76      499213 :     Page        page = BufferGetPage(buffer);
      77             :     Size        minfree;
      78             :     TransactionId OldestXmin;
      79             : 
      80             :     /*
      81             :      * We can't write WAL in recovery mode, so there's no point trying to
      82             :      * clean the page. The master will likely issue a cleaning WAL record soon
      83             :      * anyway, so this is no particular loss.
      84             :      */
      85      499213 :     if (RecoveryInProgress())
      86           0 :         return;
      87             : 
      88             :     /*
      89             :      * Use the appropriate xmin horizon for this relation. If it's a proper
      90             :      * catalog relation or a user defined, additional, catalog relation, we
      91             :      * need to use the horizon that includes slots, otherwise the data-only
      92             :      * horizon can be used. Note that the toast relation of user defined
      93             :      * relations are *not* considered catalog relations.
      94             :      *
      95             :      * It is OK to apply the old snapshot limit before acquiring the cleanup
      96             :      * lock because the worst that can happen is that we are not quite as
      97             :      * aggressive about the cleanup (by however many transaction IDs are
      98             :      * consumed between this point and acquiring the lock).  This allows us to
      99             :      * save significant overhead in the case where the page is found not to be
     100             :      * prunable.
     101             :      */
     102      666243 :     if (IsCatalogRelation(relation) ||
     103      167030 :         RelationIsAccessibleInLogicalDecoding(relation))
     104      332183 :         OldestXmin = RecentGlobalXmin;
     105             :     else
     106      167030 :         OldestXmin =
     107      167030 :             TransactionIdLimitedForOldSnapshots(RecentGlobalDataXmin,
     108             :                                                 relation);
     109             : 
     110      499213 :     Assert(TransactionIdIsValid(OldestXmin));
     111             : 
     112             :     /*
     113             :      * Let's see if we really need pruning.
     114             :      *
     115             :      * Forget it if page is not hinted to contain something prunable that's
     116             :      * older than OldestXmin.
     117             :      */
     118      499213 :     if (!PageIsPrunable(page, OldestXmin))
     119      424145 :         return;
     120             : 
     121             :     /*
     122             :      * We prune when a previous UPDATE failed to find enough space on the page
     123             :      * for a new tuple version, or when free space falls below the relation's
     124             :      * fill-factor target (but not less than 10%).
     125             :      *
     126             :      * Checking free space here is questionable since we aren't holding any
     127             :      * lock on the buffer; in the worst case we could get a bogus answer. It's
     128             :      * unlikely to be *seriously* wrong, though, since reading either pd_lower
     129             :      * or pd_upper is probably atomic.  Avoiding taking a lock seems more
     130             :      * important than sometimes getting a wrong answer in what is after all
     131             :      * just a heuristic estimate.
     132             :      */
     133       75068 :     minfree = RelationGetTargetPageFreeSpace(relation,
     134             :                                              HEAP_DEFAULT_FILLFACTOR);
     135       75068 :     minfree = Max(minfree, BLCKSZ / 10);
     136             : 
     137       75068 :     if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     138             :     {
     139             :         /* OK, try to get exclusive buffer lock */
     140        2346 :         if (!ConditionalLockBufferForCleanup(buffer))
     141          42 :             return;
     142             : 
     143             :         /*
     144             :          * Now that we have buffer lock, get accurate information about the
     145             :          * page's free space, and recheck the heuristic about whether to
     146             :          * prune. (We needn't recheck PageIsPrunable, since no one else could
     147             :          * have pruned while we hold pin.)
     148             :          */
     149        2304 :         if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
     150             :         {
     151        2304 :             TransactionId ignore = InvalidTransactionId;    /* return value not
     152             :                                                              * needed */
     153             : 
     154             :             /* OK to prune */
     155        2304 :             (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
     156             :         }
     157             : 
     158             :         /* And release buffer lock */
     159        2304 :         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
     160             :     }
     161             : }
     162             : 
     163             : 
     164             : /*
     165             :  * Prune and repair fragmentation in the specified page.
     166             :  *
     167             :  * Caller must have pin and buffer cleanup lock on the page.
     168             :  *
     169             :  * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
     170             :  * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
     171             :  *
     172             :  * If report_stats is true then we send the number of reclaimed heap-only
     173             :  * tuples to pgstats.  (This must be FALSE during vacuum, since vacuum will
     174             :  * send its own new total to pgstats, and we don't want this delta applied
     175             :  * on top of that.)
     176             :  *
     177             :  * Returns the number of tuples deleted from the page and sets
     178             :  * latestRemovedXid.
     179             :  */
     180             : int
     181        6824 : heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
     182             :                 bool report_stats, TransactionId *latestRemovedXid)
     183             : {
     184        6824 :     int         ndeleted = 0;
     185        6824 :     Page        page = BufferGetPage(buffer);
     186             :     OffsetNumber offnum,
     187             :                 maxoff;
     188             :     PruneState  prstate;
     189             : 
     190             :     /*
     191             :      * Our strategy is to scan the page and make lists of items to change,
     192             :      * then apply the changes within a critical section.  This keeps as much
     193             :      * logic as possible out of the critical section, and also ensures that
     194             :      * WAL replay will work the same as the normal case.
     195             :      *
     196             :      * First, initialize the new pd_prune_xid value to zero (indicating no
     197             :      * prunable tuples).  If we find any tuples which may soon become
     198             :      * prunable, we will save the lowest relevant XID in new_prune_xid. Also
     199             :      * initialize the rest of our working state.
     200             :      */
     201        6824 :     prstate.new_prune_xid = InvalidTransactionId;
     202        6824 :     prstate.latestRemovedXid = *latestRemovedXid;
     203        6824 :     prstate.nredirected = prstate.ndead = prstate.nunused = 0;
     204        6824 :     memset(prstate.marked, 0, sizeof(prstate.marked));
     205             : 
     206             :     /* Scan the page */
     207        6824 :     maxoff = PageGetMaxOffsetNumber(page);
     208      651230 :     for (offnum = FirstOffsetNumber;
     209             :          offnum <= maxoff;
     210      637582 :          offnum = OffsetNumberNext(offnum))
     211             :     {
     212             :         ItemId      itemid;
     213             : 
     214             :         /* Ignore items already processed as part of an earlier chain */
     215      637582 :         if (prstate.marked[offnum])
     216        4160 :             continue;
     217             : 
     218             :         /* Nothing to do if slot is empty or already dead */
     219      633422 :         itemid = PageGetItemId(page, offnum);
     220      633422 :         if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
     221      100490 :             continue;
     222             : 
     223             :         /* Process this item or chain of items */
     224      532932 :         ndeleted += heap_prune_chain(relation, buffer, offnum,
     225             :                                      OldestXmin,
     226             :                                      &prstate);
     227             :     }
     228             : 
     229             :     /* Any error while applying the changes is critical */
     230        6824 :     START_CRIT_SECTION();
     231             : 
     232             :     /* Have we found any prunable items? */
     233        6824 :     if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
     234             :     {
     235             :         /*
     236             :          * Apply the planned item changes, then repair page fragmentation, and
     237             :          * update the page's hint bit about whether it has free line pointers.
     238             :          */
     239        2900 :         heap_page_prune_execute(buffer,
     240             :                                 prstate.redirected, prstate.nredirected,
     241             :                                 prstate.nowdead, prstate.ndead,
     242             :                                 prstate.nowunused, prstate.nunused);
     243             : 
     244             :         /*
     245             :          * Update the page's pd_prune_xid field to either zero, or the lowest
     246             :          * XID of any soon-prunable tuple.
     247             :          */
     248        2900 :         ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
     249             : 
     250             :         /*
     251             :          * Also clear the "page is full" flag, since there's no point in
     252             :          * repeating the prune/defrag process until something else happens to
     253             :          * the page.
     254             :          */
     255        2900 :         PageClearFull(page);
     256             : 
     257        2900 :         MarkBufferDirty(buffer);
     258             : 
     259             :         /*
     260             :          * Emit a WAL HEAP_CLEAN record showing what we did
     261             :          */
     262        5800 :         if (RelationNeedsWAL(relation))
     263             :         {
     264             :             XLogRecPtr  recptr;
     265             : 
     266        2900 :             recptr = log_heap_clean(relation, buffer,
     267             :                                     prstate.redirected, prstate.nredirected,
     268             :                                     prstate.nowdead, prstate.ndead,
     269             :                                     prstate.nowunused, prstate.nunused,
     270             :                                     prstate.latestRemovedXid);
     271             : 
     272        2900 :             PageSetLSN(BufferGetPage(buffer), recptr);
     273             :         }
     274             :     }
     275             :     else
     276             :     {
     277             :         /*
     278             :          * If we didn't prune anything, but have found a new value for the
     279             :          * pd_prune_xid field, update it and mark the buffer dirty. This is
     280             :          * treated as a non-WAL-logged hint.
     281             :          *
     282             :          * Also clear the "page is full" flag if it is set, since there's no
     283             :          * point in repeating the prune/defrag process until something else
     284             :          * happens to the page.
     285             :          */
     286        7833 :         if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
     287        3909 :             PageIsFull(page))
     288             :         {
     289          15 :             ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
     290          15 :             PageClearFull(page);
     291          15 :             MarkBufferDirtyHint(buffer, true);
     292             :         }
     293             :     }
     294             : 
     295        6824 :     END_CRIT_SECTION();
     296             : 
     297             :     /*
     298             :      * If requested, report the number of tuples reclaimed to pgstats. This is
     299             :      * ndeleted minus ndead, because we don't want to count a now-DEAD root
     300             :      * item as a deletion for this purpose.
     301             :      */
     302        6824 :     if (report_stats && ndeleted > prstate.ndead)
     303         785 :         pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
     304             : 
     305        6824 :     *latestRemovedXid = prstate.latestRemovedXid;
     306             : 
     307             :     /*
     308             :      * XXX Should we update the FSM information of this page ?
     309             :      *
     310             :      * There are two schools of thought here. We may not want to update FSM
     311             :      * information so that the page is not used for unrelated UPDATEs/INSERTs
     312             :      * and any free space in this page will remain available for further
     313             :      * UPDATEs in *this* page, thus improving chances for doing HOT updates.
     314             :      *
     315             :      * But for a large table and where a page does not receive further UPDATEs
     316             :      * for a long time, we might waste this space by not updating the FSM
     317             :      * information. The relation may get extended and fragmented further.
     318             :      *
     319             :      * One possibility is to leave "fillfactor" worth of space in this page
     320             :      * and update FSM with the remaining space.
     321             :      */
     322             : 
     323        6824 :     return ndeleted;
     324             : }
     325             : 
     326             : 
     327             : /*
     328             :  * Prune specified item pointer or a HOT chain originating at that item.
     329             :  *
     330             :  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
     331             :  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
     332             :  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
     333             :  * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
     334             :  * DEAD, the OldestXmin test is just too coarse to detect it.
     335             :  *
     336             :  * The root line pointer is redirected to the tuple immediately after the
     337             :  * latest DEAD tuple.  If all tuples in the chain are DEAD, the root line
     338             :  * pointer is marked LP_DEAD.  (This includes the case of a DEAD simple
     339             :  * tuple, which we treat as a chain of length 1.)
     340             :  *
     341             :  * OldestXmin is the cutoff XID used to identify dead tuples.
     342             :  *
     343             :  * We don't actually change the page here, except perhaps for hint-bit updates
     344             :  * caused by HeapTupleSatisfiesVacuum.  We just add entries to the arrays in
     345             :  * prstate showing the changes to be made.  Items to be redirected are added
     346             :  * to the redirected[] array (two entries per redirection); items to be set to
     347             :  * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
     348             :  * state are added to nowunused[].
     349             :  *
     350             :  * Returns the number of tuples (to be) deleted from the page.
     351             :  */
     352             : static int
     353      532932 : heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
     354             :                  TransactionId OldestXmin,
     355             :                  PruneState *prstate)
     356             : {
     357      532932 :     int         ndeleted = 0;
     358      532932 :     Page        dp = (Page) BufferGetPage(buffer);
     359      532932 :     TransactionId priorXmax = InvalidTransactionId;
     360             :     ItemId      rootlp;
     361             :     HeapTupleHeader htup;
     362      532932 :     OffsetNumber latestdead = InvalidOffsetNumber,
     363      532932 :                 maxoff = PageGetMaxOffsetNumber(dp),
     364             :                 offnum;
     365             :     OffsetNumber chainitems[MaxHeapTuplesPerPage];
     366      532932 :     int         nchain = 0,
     367             :                 i;
     368             :     HeapTupleData tup;
     369             : 
     370      532932 :     tup.t_tableOid = RelationGetRelid(relation);
     371             : 
     372      532932 :     rootlp = PageGetItemId(dp, rootoffnum);
     373             : 
     374             :     /*
     375             :      * If it's a heap-only tuple, then it is not the start of a HOT chain.
     376             :      */
     377      532932 :     if (ItemIdIsNormal(rootlp))
     378             :     {
     379      528556 :         htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
     380             : 
     381      528556 :         tup.t_data = htup;
     382      528556 :         tup.t_len = ItemIdGetLength(rootlp);
     383      528556 :         ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
     384             : 
     385      528556 :         if (HeapTupleHeaderIsHeapOnly(htup))
     386             :         {
     387             :             /*
     388             :              * If the tuple is DEAD and doesn't chain to anything else, mark
     389             :              * it unused immediately.  (If it does chain, we can only remove
     390             :              * it as part of pruning its chain.)
     391             :              *
     392             :              * We need this primarily to handle aborted HOT updates, that is,
     393             :              * XMIN_INVALID heap-only tuples.  Those might not be linked to by
     394             :              * any chain, since the parent tuple might be re-updated before
     395             :              * any pruning occurs.  So we have to be able to reap them
     396             :              * separately from chain-pruning.  (Note that
     397             :              * HeapTupleHeaderIsHotUpdated will never return true for an
     398             :              * XMIN_INVALID tuple, so this code will work even when there were
     399             :              * sequential updates within the aborted transaction.)
     400             :              *
     401             :              * Note that we might first arrive at a dead heap-only tuple
     402             :              * either here or while following a chain below.  Whichever path
     403             :              * gets there first will mark the tuple unused.
     404             :              */
     405        4885 :             if (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer)
     406         450 :                 == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
     407             :             {
     408         308 :                 heap_prune_record_unused(prstate, rootoffnum);
     409         308 :                 HeapTupleHeaderAdvanceLatestRemovedXid(htup,
     410             :                                                        &prstate->latestRemovedXid);
     411         308 :                 ndeleted++;
     412             :             }
     413             : 
     414             :             /* Nothing more to do */
     415        4885 :             return ndeleted;
     416             :         }
     417             :     }
     418             : 
     419             :     /* Start from the root tuple */
     420      528047 :     offnum = rootoffnum;
     421             : 
     422             :     /* while not end of the chain */
     423             :     for (;;)
     424             :     {
     425             :         ItemId      lp;
     426             :         bool        tupdead,
     427             :                     recent_dead;
     428             : 
     429             :         /* Some sanity checks */
     430      536624 :         if (offnum < FirstOffsetNumber || offnum > maxoff)
     431             :             break;
     432             : 
     433             :         /* If item is already processed, stop --- it must not be same chain */
     434      536624 :         if (prstate->marked[offnum])
     435         120 :             break;
     436             : 
     437      536504 :         lp = PageGetItemId(dp, offnum);
     438             : 
     439             :         /* Unused item obviously isn't part of the chain */
     440      536504 :         if (!ItemIdIsUsed(lp))
     441           0 :             break;
     442             : 
     443             :         /*
     444             :          * If we are looking at the redirected root line pointer, jump to the
     445             :          * first normal tuple in the chain.  If we find a redirect somewhere
     446             :          * else, stop --- it must not be same chain.
     447             :          */
     448      536504 :         if (ItemIdIsRedirected(lp))
     449             :         {
     450        4376 :             if (nchain > 0)
     451           0 :                 break;          /* not at start of chain */
     452        4376 :             chainitems[nchain++] = offnum;
     453        4376 :             offnum = ItemIdGetRedirect(rootlp);
     454        4376 :             continue;
     455             :         }
     456             : 
     457             :         /*
     458             :          * Likewise, a dead item pointer can't be part of the chain. (We
     459             :          * already eliminated the case of dead root tuple outside this
     460             :          * function.)
     461             :          */
     462      532128 :         if (ItemIdIsDead(lp))
     463           0 :             break;
     464             : 
     465      532128 :         Assert(ItemIdIsNormal(lp));
     466      532128 :         htup = (HeapTupleHeader) PageGetItem(dp, lp);
     467             : 
     468      532128 :         tup.t_data = htup;
     469      532128 :         tup.t_len = ItemIdGetLength(lp);
     470      532128 :         ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
     471             : 
     472             :         /*
     473             :          * Check the tuple XMIN against prior XMAX, if any
     474             :          */
     475      536239 :         if (TransactionIdIsValid(priorXmax) &&
     476        4111 :             !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
     477           0 :             break;
     478             : 
     479             :         /*
     480             :          * OK, this tuple is indeed a member of the chain.
     481             :          */
     482      532128 :         chainitems[nchain++] = offnum;
     483             : 
     484             :         /*
     485             :          * Check tuple's visibility status.
     486             :          */
     487      532128 :         tupdead = recent_dead = false;
     488             : 
     489      532128 :         switch (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer))
     490             :         {
     491             :             case HEAPTUPLE_DEAD:
     492       97553 :                 tupdead = true;
     493       97553 :                 break;
     494             : 
     495             :             case HEAPTUPLE_RECENTLY_DEAD:
     496       33103 :                 recent_dead = true;
     497             : 
     498             :                 /*
     499             :                  * This tuple may soon become DEAD.  Update the hint field so
     500             :                  * that the page is reconsidered for pruning in future.
     501             :                  */
     502       33103 :                 heap_prune_record_prunable(prstate,
     503       66206 :                                            HeapTupleHeaderGetUpdateXid(htup));
     504       33103 :                 break;
     505             : 
     506             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
     507             : 
     508             :                 /*
     509             :                  * This tuple may soon become DEAD.  Update the hint field so
     510             :                  * that the page is reconsidered for pruning in future.
     511             :                  */
     512         516 :                 heap_prune_record_prunable(prstate,
     513        1032 :                                            HeapTupleHeaderGetUpdateXid(htup));
     514         516 :                 break;
     515             : 
     516             :             case HEAPTUPLE_LIVE:
     517             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
     518             : 
     519             :                 /*
     520             :                  * If we wanted to optimize for aborts, we might consider
     521             :                  * marking the page prunable when we see INSERT_IN_PROGRESS.
     522             :                  * But we don't.  See related decisions about when to mark the
     523             :                  * page prunable in heapam.c.
     524             :                  */
     525      400956 :                 break;
     526             : 
     527             :             default:
     528           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
     529             :                 break;
     530             :         }
     531             : 
     532             :         /*
     533             :          * Remember the last DEAD tuple seen.  We will advance past
     534             :          * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
     535             :          * but we can't advance past anything else.  (XXX is it really worth
     536             :          * continuing to scan beyond RECENTLY_DEAD?  The case where we will
     537             :          * find another DEAD tuple is a fairly unusual corner case.)
     538             :          */
     539      532128 :         if (tupdead)
     540             :         {
     541       97553 :             latestdead = offnum;
     542       97553 :             HeapTupleHeaderAdvanceLatestRemovedXid(htup,
     543             :                                                    &prstate->latestRemovedXid);
     544             :         }
     545      434575 :         else if (!recent_dead)
     546      401472 :             break;
     547             : 
     548             :         /*
     549             :          * If the tuple is not HOT-updated, then we are at the end of this
     550             :          * HOT-update chain.
     551             :          */
     552      130656 :         if (!HeapTupleHeaderIsHotUpdated(htup))
     553             :             break;
     554             : 
     555             :         /*
     556             :          * Advance to next chain member.
     557             :          */
     558        4201 :         Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
     559             :                BufferGetBlockNumber(buffer));
     560        4201 :         offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
     561        4201 :         priorXmax = HeapTupleHeaderGetUpdateXid(htup);
     562        8577 :     }
     563             : 
     564             :     /*
     565             :      * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
     566             :      * the DEAD tuples at the start of the chain are removed and the root line
     567             :      * pointer is appropriately redirected.
     568             :      */
     569      528047 :     if (OffsetNumberIsValid(latestdead))
     570             :     {
     571             :         /*
     572             :          * Mark as unused each intermediate item that we are able to remove
     573             :          * from the chain.
     574             :          *
     575             :          * When the previous item is the last dead tuple seen, we are at the
     576             :          * right candidate for redirection.
     577             :          */
     578       98209 :         for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
     579             :         {
     580        2990 :             heap_prune_record_unused(prstate, chainitems[i]);
     581        2990 :             ndeleted++;
     582             :         }
     583             : 
     584             :         /*
     585             :          * If the root entry had been a normal tuple, we are deleting it, so
     586             :          * count it in the result.  But changing a redirect (even to DEAD
     587             :          * state) doesn't count.
     588             :          */
     589       95219 :         if (ItemIdIsNormal(rootlp))
     590       94563 :             ndeleted++;
     591             : 
     592             :         /*
     593             :          * If the DEAD tuple is at the end of the chain, the entire chain is
     594             :          * dead and the root line pointer can be marked dead.  Otherwise just
     595             :          * redirect the root to the correct chain member.
     596             :          */
     597       95219 :         if (i >= nchain)
     598       93820 :             heap_prune_record_dead(prstate, rootoffnum);
     599             :         else
     600        1399 :             heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
     601             :     }
     602      432828 :     else if (nchain < 2 && ItemIdIsRedirected(rootlp))
     603             :     {
     604             :         /*
     605             :          * We found a redirect item that doesn't point to a valid follow-on
     606             :          * item.  This can happen if the loop in heap_page_prune caused us to
     607             :          * visit the dead successor of a redirect item before visiting the
     608             :          * redirect item.  We can clean up by setting the redirect item to
     609             :          * DEAD state.
     610             :          */
     611          30 :         heap_prune_record_dead(prstate, rootoffnum);
     612             :     }
     613             : 
     614      528047 :     return ndeleted;
     615             : }
     616             : 
     617             : /* Record lowest soon-prunable XID */
     618             : static void
     619       33619 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
     620             : {
     621             :     /*
     622             :      * This should exactly match the PageSetPrunable macro.  We can't store
     623             :      * directly into the page header yet, so we update working state.
     624             :      */
     625       33619 :     Assert(TransactionIdIsNormal(xid));
     626       66570 :     if (!TransactionIdIsValid(prstate->new_prune_xid) ||
     627       32951 :         TransactionIdPrecedes(xid, prstate->new_prune_xid))
     628         732 :         prstate->new_prune_xid = xid;
     629       33619 : }
     630             : 
     631             : /* Record item pointer to be redirected */
     632             : static void
     633        1399 : heap_prune_record_redirect(PruneState *prstate,
     634             :                            OffsetNumber offnum, OffsetNumber rdoffnum)
     635             : {
     636        1399 :     Assert(prstate->nredirected < MaxHeapTuplesPerPage);
     637        1399 :     prstate->redirected[prstate->nredirected * 2] = offnum;
     638        1399 :     prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
     639        1399 :     prstate->nredirected++;
     640        1399 :     Assert(!prstate->marked[offnum]);
     641        1399 :     prstate->marked[offnum] = true;
     642        1399 :     Assert(!prstate->marked[rdoffnum]);
     643        1399 :     prstate->marked[rdoffnum] = true;
     644        1399 : }
     645             : 
     646             : /* Record item pointer to be marked dead */
     647             : static void
     648       93850 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
     649             : {
     650       93850 :     Assert(prstate->ndead < MaxHeapTuplesPerPage);
     651       93850 :     prstate->nowdead[prstate->ndead] = offnum;
     652       93850 :     prstate->ndead++;
     653       93850 :     Assert(!prstate->marked[offnum]);
     654       93850 :     prstate->marked[offnum] = true;
     655       93850 : }
     656             : 
     657             : /* Record item pointer to be marked unused */
     658             : static void
     659        3298 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
     660             : {
     661        3298 :     Assert(prstate->nunused < MaxHeapTuplesPerPage);
     662        3298 :     prstate->nowunused[prstate->nunused] = offnum;
     663        3298 :     prstate->nunused++;
     664        3298 :     Assert(!prstate->marked[offnum]);
     665        3298 :     prstate->marked[offnum] = true;
     666        3298 : }
     667             : 
     668             : 
     669             : /*
     670             :  * Perform the actual page changes needed by heap_page_prune.
     671             :  * It is expected that the caller has suitable pin and lock on the
     672             :  * buffer, and is inside a critical section.
     673             :  *
     674             :  * This is split out because it is also used by heap_xlog_clean()
     675             :  * to replay the WAL record when needed after a crash.  Note that the
     676             :  * arguments are identical to those of log_heap_clean().
     677             :  */
     678             : void
     679        2900 : heap_page_prune_execute(Buffer buffer,
     680             :                         OffsetNumber *redirected, int nredirected,
     681             :                         OffsetNumber *nowdead, int ndead,
     682             :                         OffsetNumber *nowunused, int nunused)
     683             : {
     684        2900 :     Page        page = (Page) BufferGetPage(buffer);
     685             :     OffsetNumber *offnum;
     686             :     int         i;
     687             : 
     688             :     /* Update all redirected line pointers */
     689        2900 :     offnum = redirected;
     690        4299 :     for (i = 0; i < nredirected; i++)
     691             :     {
     692        1399 :         OffsetNumber fromoff = *offnum++;
     693        1399 :         OffsetNumber tooff = *offnum++;
     694        1399 :         ItemId      fromlp = PageGetItemId(page, fromoff);
     695             : 
     696        1399 :         ItemIdSetRedirect(fromlp, tooff);
     697             :     }
     698             : 
     699             :     /* Update all now-dead line pointers */
     700        2900 :     offnum = nowdead;
     701       96750 :     for (i = 0; i < ndead; i++)
     702             :     {
     703       93850 :         OffsetNumber off = *offnum++;
     704       93850 :         ItemId      lp = PageGetItemId(page, off);
     705             : 
     706       93850 :         ItemIdSetDead(lp);
     707             :     }
     708             : 
     709             :     /* Update all now-unused line pointers */
     710        2900 :     offnum = nowunused;
     711        6198 :     for (i = 0; i < nunused; i++)
     712             :     {
     713        3298 :         OffsetNumber off = *offnum++;
     714        3298 :         ItemId      lp = PageGetItemId(page, off);
     715             : 
     716        3298 :         ItemIdSetUnused(lp);
     717             :     }
     718             : 
     719             :     /*
     720             :      * Finally, repair any fragmentation, and update the page's hint bit about
     721             :      * whether it has free pointers.
     722             :      */
     723        2900 :     PageRepairFragmentation(page);
     724        2900 : }
     725             : 
     726             : 
     727             : /*
     728             :  * For all items in this page, find their respective root line pointers.
     729             :  * If item k is part of a HOT-chain with root at item j, then we set
     730             :  * root_offsets[k - 1] = j.
     731             :  *
     732             :  * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
     733             :  * We zero out all unused entries.
     734             :  *
     735             :  * The function must be called with at least share lock on the buffer, to
     736             :  * prevent concurrent prune operations.
     737             :  *
     738             :  * Note: The information collected here is valid only as long as the caller
     739             :  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
     740             :  * and reused by a completely unrelated tuple.
     741             :  */
     742             : void
     743       11204 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
     744             : {
     745             :     OffsetNumber offnum,
     746             :                 maxoff;
     747             : 
     748       11204 :     MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
     749             : 
     750       11204 :     maxoff = PageGetMaxOffsetNumber(page);
     751      703494 :     for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
     752             :     {
     753      692290 :         ItemId      lp = PageGetItemId(page, offnum);
     754             :         HeapTupleHeader htup;
     755             :         OffsetNumber nextoffnum;
     756             :         TransactionId priorXmax;
     757             : 
     758             :         /* skip unused and dead items */
     759      692290 :         if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
     760         140 :             continue;
     761             : 
     762      692150 :         if (ItemIdIsNormal(lp))
     763             :         {
     764      692150 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
     765             : 
     766             :             /*
     767             :              * Check if this tuple is part of a HOT-chain rooted at some other
     768             :              * tuple. If so, skip it for now; we'll process it when we find
     769             :              * its root.
     770             :              */
     771      692150 :             if (HeapTupleHeaderIsHeapOnly(htup))
     772          59 :                 continue;
     773             : 
     774             :             /*
     775             :              * This is either a plain tuple or the root of a HOT-chain.
     776             :              * Remember it in the mapping.
     777             :              */
     778      692091 :             root_offsets[offnum - 1] = offnum;
     779             : 
     780             :             /* If it's not the start of a HOT-chain, we're done with it */
     781      692091 :             if (!HeapTupleHeaderIsHotUpdated(htup))
     782      692045 :                 continue;
     783             : 
     784             :             /* Set up to scan the HOT-chain */
     785          46 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
     786          46 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
     787             :         }
     788             :         else
     789             :         {
     790             :             /* Must be a redirect item. We do not set its root_offsets entry */
     791           0 :             Assert(ItemIdIsRedirected(lp));
     792             :             /* Set up to scan the HOT-chain */
     793           0 :             nextoffnum = ItemIdGetRedirect(lp);
     794           0 :             priorXmax = InvalidTransactionId;
     795             :         }
     796             : 
     797             :         /*
     798             :          * Now follow the HOT-chain and collect other tuples in the chain.
     799             :          *
     800             :          * Note: Even though this is a nested loop, the complexity of the
     801             :          * function is O(N) because a tuple in the page should be visited not
     802             :          * more than twice, once in the outer loop and once in HOT-chain
     803             :          * chases.
     804             :          */
     805             :         for (;;)
     806             :         {
     807          59 :             lp = PageGetItemId(page, nextoffnum);
     808             : 
     809             :             /* Check for broken chains */
     810          59 :             if (!ItemIdIsNormal(lp))
     811           0 :                 break;
     812             : 
     813          59 :             htup = (HeapTupleHeader) PageGetItem(page, lp);
     814             : 
     815         118 :             if (TransactionIdIsValid(priorXmax) &&
     816          59 :                 !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
     817           0 :                 break;
     818             : 
     819             :             /* Remember the root line pointer for this item */
     820          59 :             root_offsets[nextoffnum - 1] = offnum;
     821             : 
     822             :             /* Advance to next chain member, if any */
     823          59 :             if (!HeapTupleHeaderIsHotUpdated(htup))
     824             :                 break;
     825             : 
     826          13 :             nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
     827          13 :             priorXmax = HeapTupleHeaderGetUpdateXid(htup);
     828          13 :         }
     829             :     }
     830       11204 : }

Generated by: LCOV version 1.11