LCOV - code coverage report
Current view: top level - src/backend/commands - vacuumlazy.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 486 634 76.7 %
Date: 2017-09-29 15:12:54 Functions: 15 16 93.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * vacuumlazy.c
       4             :  *    Concurrent ("lazy") vacuuming.
       5             :  *
       6             :  *
       7             :  * The major space usage for LAZY VACUUM is storage for the array of dead tuple
       8             :  * TIDs.  We want to ensure we can vacuum even the very largest relations with
       9             :  * finite memory space usage.  To do that, we set upper bounds on the number of
      10             :  * tuples we will keep track of at once.
      11             :  *
      12             :  * We are willing to use at most maintenance_work_mem (or perhaps
      13             :  * autovacuum_work_mem) memory space to keep track of dead tuples.  We
      14             :  * initially allocate an array of TIDs of that size, with an upper limit that
      15             :  * depends on table size (this limit ensures we don't allocate a huge area
      16             :  * uselessly for vacuuming small tables).  If the array threatens to overflow,
      17             :  * we suspend the heap scan phase and perform a pass of index cleanup and page
      18             :  * compaction, then resume the heap scan with an empty TID array.
      19             :  *
      20             :  * If we're processing a table with no indexes, we can just vacuum each page
      21             :  * as we go; there's no need to save up multiple tuples to minimize the number
      22             :  * of index scans performed.  So we don't use maintenance_work_mem memory for
      23             :  * the TID array, just enough to hold as many heap tuples as fit on one page.
      24             :  *
      25             :  *
      26             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      27             :  * Portions Copyright (c) 1994, Regents of the University of California
      28             :  *
      29             :  *
      30             :  * IDENTIFICATION
      31             :  *    src/backend/commands/vacuumlazy.c
      32             :  *
      33             :  *-------------------------------------------------------------------------
      34             :  */
      35             : #include "postgres.h"
      36             : 
      37             : #include <math.h>
      38             : 
      39             : #include "access/genam.h"
      40             : #include "access/heapam.h"
      41             : #include "access/heapam_xlog.h"
      42             : #include "access/htup_details.h"
      43             : #include "access/multixact.h"
      44             : #include "access/transam.h"
      45             : #include "access/visibilitymap.h"
      46             : #include "access/xlog.h"
      47             : #include "catalog/catalog.h"
      48             : #include "catalog/storage.h"
      49             : #include "commands/dbcommands.h"
      50             : #include "commands/progress.h"
      51             : #include "commands/vacuum.h"
      52             : #include "miscadmin.h"
      53             : #include "pgstat.h"
      54             : #include "portability/instr_time.h"
      55             : #include "postmaster/autovacuum.h"
      56             : #include "storage/bufmgr.h"
      57             : #include "storage/freespace.h"
      58             : #include "storage/lmgr.h"
      59             : #include "utils/lsyscache.h"
      60             : #include "utils/memutils.h"
      61             : #include "utils/pg_rusage.h"
      62             : #include "utils/timestamp.h"
      63             : #include "utils/tqual.h"
      64             : 
      65             : 
      66             : /*
      67             :  * Space/time tradeoff parameters: do these need to be user-tunable?
      68             :  *
      69             :  * To consider truncating the relation, we want there to be at least
      70             :  * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever
      71             :  * is less) potentially-freeable pages.
      72             :  */
      73             : #define REL_TRUNCATE_MINIMUM    1000
      74             : #define REL_TRUNCATE_FRACTION   16
      75             : 
      76             : /*
      77             :  * Timing parameters for truncate locking heuristics.
      78             :  *
      79             :  * These were not exposed as user tunable GUC values because it didn't seem
      80             :  * that the potential for improvement was great enough to merit the cost of
      81             :  * supporting them.
      82             :  */
      83             : #define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL     20  /* ms */
      84             : #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL      50  /* ms */
      85             : #define VACUUM_TRUNCATE_LOCK_TIMEOUT            5000    /* ms */
      86             : 
      87             : /*
      88             :  * Guesstimation of number of dead tuples per page.  This is used to
      89             :  * provide an upper limit to memory allocated when vacuuming small
      90             :  * tables.
      91             :  */
      92             : #define LAZY_ALLOC_TUPLES       MaxHeapTuplesPerPage
      93             : 
      94             : /*
      95             :  * Before we consider skipping a page that's marked as clean in
      96             :  * visibility map, we must've seen at least this many clean pages.
      97             :  */
      98             : #define SKIP_PAGES_THRESHOLD    ((BlockNumber) 32)
      99             : 
     100             : /*
     101             :  * Size of the prefetch window for lazy vacuum backwards truncation scan.
     102             :  * Needs to be a power of 2.
     103             :  */
     104             : #define PREFETCH_SIZE           ((BlockNumber) 32)
     105             : 
     106             : typedef struct LVRelStats
     107             : {
     108             :     /* hasindex = true means two-pass strategy; false means one-pass */
     109             :     bool        hasindex;
     110             :     /* Overall statistics about rel */
     111             :     BlockNumber old_rel_pages;  /* previous value of pg_class.relpages */
     112             :     BlockNumber rel_pages;      /* total number of pages */
     113             :     BlockNumber scanned_pages;  /* number of pages we examined */
     114             :     BlockNumber pinskipped_pages;   /* # of pages we skipped due to a pin */
     115             :     BlockNumber frozenskipped_pages;    /* # of frozen pages we skipped */
     116             :     BlockNumber tupcount_pages; /* pages whose tuples we counted */
     117             :     double      scanned_tuples; /* counts only tuples on tupcount_pages */
     118             :     double      old_rel_tuples; /* previous value of pg_class.reltuples */
     119             :     double      new_rel_tuples; /* new estimated total # of tuples */
     120             :     double      new_dead_tuples;    /* new estimated total # of dead tuples */
     121             :     BlockNumber pages_removed;
     122             :     double      tuples_deleted;
     123             :     BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
     124             :     /* List of TIDs of tuples we intend to delete */
     125             :     /* NB: this list is ordered by TID address */
     126             :     int         num_dead_tuples;    /* current # of entries */
     127             :     int         max_dead_tuples;    /* # slots allocated in array */
     128             :     ItemPointer dead_tuples;    /* array of ItemPointerData */
     129             :     int         num_index_scans;
     130             :     TransactionId latestRemovedXid;
     131             :     bool        lock_waiter_detected;
     132             : } LVRelStats;
     133             : 
     134             : 
     135             : /* A few variables that don't seem worth passing around as parameters */
     136             : static int  elevel = -1;
     137             : 
     138             : static TransactionId OldestXmin;
     139             : static TransactionId FreezeLimit;
     140             : static MultiXactId MultiXactCutoff;
     141             : 
     142             : static BufferAccessStrategy vac_strategy;
     143             : 
     144             : 
     145             : /* non-export function prototypes */
     146             : static void lazy_scan_heap(Relation onerel, int options,
     147             :                LVRelStats *vacrelstats, Relation *Irel, int nindexes,
     148             :                bool aggressive);
     149             : static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
     150             : static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
     151             : static void lazy_vacuum_index(Relation indrel,
     152             :                   IndexBulkDeleteResult **stats,
     153             :                   LVRelStats *vacrelstats);
     154             : static void lazy_cleanup_index(Relation indrel,
     155             :                    IndexBulkDeleteResult *stats,
     156             :                    LVRelStats *vacrelstats);
     157             : static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
     158             :                  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
     159             : static bool should_attempt_truncation(LVRelStats *vacrelstats);
     160             : static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
     161             : static BlockNumber count_nondeletable_pages(Relation onerel,
     162             :                          LVRelStats *vacrelstats);
     163             : static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
     164             : static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
     165             :                        ItemPointer itemptr);
     166             : static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
     167             : static int  vac_cmp_itemptr(const void *left, const void *right);
     168             : static bool heap_page_is_all_visible(Relation rel, Buffer buf,
     169             :                          TransactionId *visibility_cutoff_xid, bool *all_frozen);
     170             : 
     171             : 
     172             : /*
     173             :  *  lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation
     174             :  *
     175             :  *      This routine vacuums a single heap, cleans out its indexes, and
     176             :  *      updates its relpages and reltuples statistics.
     177             :  *
     178             :  *      At entry, we have already established a transaction and opened
     179             :  *      and locked the relation.
     180             :  */
     181             : void
     182         390 : lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
     183             :                 BufferAccessStrategy bstrategy)
     184             : {
     185             :     LVRelStats *vacrelstats;
     186             :     Relation   *Irel;
     187             :     int         nindexes;
     188             :     PGRUsage    ru0;
     189         390 :     TimestampTz starttime = 0;
     190             :     long        secs;
     191             :     int         usecs;
     192             :     double      read_rate,
     193             :                 write_rate;
     194             :     bool        aggressive;     /* should we scan all unfrozen pages? */
     195             :     bool        scanned_all_unfrozen;   /* actually scanned all such pages? */
     196             :     TransactionId xidFullScanLimit;
     197             :     MultiXactId mxactFullScanLimit;
     198             :     BlockNumber new_rel_pages;
     199             :     double      new_rel_tuples;
     200             :     BlockNumber new_rel_allvisible;
     201             :     double      new_live_tuples;
     202             :     TransactionId new_frozen_xid;
     203             :     MultiXactId new_min_multi;
     204             : 
     205         390 :     Assert(params != NULL);
     206             : 
     207             :     /* measure elapsed time iff autovacuum logging requires it */
     208         390 :     if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
     209             :     {
     210          13 :         pg_rusage_init(&ru0);
     211          13 :         starttime = GetCurrentTimestamp();
     212             :     }
     213             : 
     214         390 :     if (options & VACOPT_VERBOSE)
     215           0 :         elevel = INFO;
     216             :     else
     217         390 :         elevel = DEBUG2;
     218             : 
     219         390 :     pgstat_progress_start_command(PROGRESS_COMMAND_VACUUM,
     220             :                                   RelationGetRelid(onerel));
     221             : 
     222         390 :     vac_strategy = bstrategy;
     223             : 
     224         390 :     vacuum_set_xid_limits(onerel,
     225             :                           params->freeze_min_age,
     226             :                           params->freeze_table_age,
     227             :                           params->multixact_freeze_min_age,
     228             :                           params->multixact_freeze_table_age,
     229             :                           &OldestXmin, &FreezeLimit, &xidFullScanLimit,
     230             :                           &MultiXactCutoff, &mxactFullScanLimit);
     231             : 
     232             :     /*
     233             :      * We request an aggressive scan if the table's frozen Xid is now older
     234             :      * than or equal to the requested Xid full-table scan limit; or if the
     235             :      * table's minimum MultiXactId is older than or equal to the requested
     236             :      * mxid full-table scan limit; or if DISABLE_PAGE_SKIPPING was specified.
     237             :      */
     238         390 :     aggressive = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
     239             :                                                xidFullScanLimit);
     240         390 :     aggressive |= MultiXactIdPrecedesOrEquals(onerel->rd_rel->relminmxid,
     241             :                                               mxactFullScanLimit);
     242         390 :     if (options & VACOPT_DISABLE_PAGE_SKIPPING)
     243           1 :         aggressive = true;
     244             : 
     245         390 :     vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
     246             : 
     247         390 :     vacrelstats->old_rel_pages = onerel->rd_rel->relpages;
     248         390 :     vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
     249         390 :     vacrelstats->num_index_scans = 0;
     250         390 :     vacrelstats->pages_removed = 0;
     251         390 :     vacrelstats->lock_waiter_detected = false;
     252             : 
     253             :     /* Open all indexes of the relation */
     254         390 :     vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &Irel);
     255         390 :     vacrelstats->hasindex = (nindexes > 0);
     256             : 
     257             :     /* Do the vacuuming */
     258         390 :     lazy_scan_heap(onerel, options, vacrelstats, Irel, nindexes, aggressive);
     259             : 
     260             :     /* Done with indexes */
     261         390 :     vac_close_indexes(nindexes, Irel, NoLock);
     262             : 
     263             :     /*
     264             :      * Compute whether we actually scanned the all unfrozen pages. If we did,
     265             :      * we can adjust relfrozenxid and relminmxid.
     266             :      *
     267             :      * NB: We need to check this before truncating the relation, because that
     268             :      * will change ->rel_pages.
     269             :      */
     270         780 :     if ((vacrelstats->scanned_pages + vacrelstats->frozenskipped_pages)
     271         390 :         < vacrelstats->rel_pages)
     272             :     {
     273           2 :         Assert(!aggressive);
     274           2 :         scanned_all_unfrozen = false;
     275             :     }
     276             :     else
     277         388 :         scanned_all_unfrozen = true;
     278             : 
     279             :     /*
     280             :      * Optionally truncate the relation.
     281             :      */
     282         390 :     if (should_attempt_truncation(vacrelstats))
     283          12 :         lazy_truncate_heap(onerel, vacrelstats);
     284             : 
     285             :     /* Report that we are now doing final cleanup */
     286         390 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     287             :                                  PROGRESS_VACUUM_PHASE_FINAL_CLEANUP);
     288             : 
     289             :     /* Vacuum the Free Space Map */
     290         390 :     FreeSpaceMapVacuum(onerel);
     291             : 
     292             :     /*
     293             :      * Update statistics in pg_class.
     294             :      *
     295             :      * A corner case here is that if we scanned no pages at all because every
     296             :      * page is all-visible, we should not update relpages/reltuples, because
     297             :      * we have no new information to contribute.  In particular this keeps us
     298             :      * from replacing relpages=reltuples=0 (which means "unknown tuple
     299             :      * density") with nonzero relpages and reltuples=0 (which means "zero
     300             :      * tuple density") unless there's some actual evidence for the latter.
     301             :      *
     302             :      * It's important that we use tupcount_pages and not scanned_pages for the
     303             :      * check described above; scanned_pages counts pages where we could not
     304             :      * get cleanup lock, and which were processed only for frozenxid purposes.
     305             :      *
     306             :      * We do update relallvisible even in the corner case, since if the table
     307             :      * is all-visible we'd definitely like to know that.  But clamp the value
     308             :      * to be not more than what we're setting relpages to.
     309             :      *
     310             :      * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
     311             :      * since then we don't know for certain that all tuples have a newer xmin.
     312             :      */
     313         390 :     new_rel_pages = vacrelstats->rel_pages;
     314         390 :     new_rel_tuples = vacrelstats->new_rel_tuples;
     315         390 :     if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0)
     316             :     {
     317           0 :         new_rel_pages = vacrelstats->old_rel_pages;
     318           0 :         new_rel_tuples = vacrelstats->old_rel_tuples;
     319             :     }
     320             : 
     321         390 :     visibilitymap_count(onerel, &new_rel_allvisible, NULL);
     322         390 :     if (new_rel_allvisible > new_rel_pages)
     323           0 :         new_rel_allvisible = new_rel_pages;
     324             : 
     325         390 :     new_frozen_xid = scanned_all_unfrozen ? FreezeLimit : InvalidTransactionId;
     326         390 :     new_min_multi = scanned_all_unfrozen ? MultiXactCutoff : InvalidMultiXactId;
     327             : 
     328         390 :     vac_update_relstats(onerel,
     329             :                         new_rel_pages,
     330             :                         new_rel_tuples,
     331             :                         new_rel_allvisible,
     332         390 :                         vacrelstats->hasindex,
     333             :                         new_frozen_xid,
     334             :                         new_min_multi,
     335             :                         false);
     336             : 
     337             :     /* report results to the stats collector, too */
     338         390 :     new_live_tuples = new_rel_tuples - vacrelstats->new_dead_tuples;
     339         390 :     if (new_live_tuples < 0)
     340           0 :         new_live_tuples = 0;    /* just in case */
     341             : 
     342         780 :     pgstat_report_vacuum(RelationGetRelid(onerel),
     343         390 :                          onerel->rd_rel->relisshared,
     344             :                          new_live_tuples,
     345         390 :                          vacrelstats->new_dead_tuples);
     346         390 :     pgstat_progress_end_command();
     347             : 
     348             :     /* and log the action if appropriate */
     349         390 :     if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
     350             :     {
     351          13 :         TimestampTz endtime = GetCurrentTimestamp();
     352             : 
     353          13 :         if (params->log_min_duration == 0 ||
     354           0 :             TimestampDifferenceExceeds(starttime, endtime,
     355             :                                        params->log_min_duration))
     356             :         {
     357             :             StringInfoData buf;
     358             : 
     359          13 :             TimestampDifference(starttime, endtime, &secs, &usecs);
     360             : 
     361          13 :             read_rate = 0;
     362          13 :             write_rate = 0;
     363          13 :             if ((secs > 0) || (usecs > 0))
     364             :             {
     365          13 :                 read_rate = (double) BLCKSZ * VacuumPageMiss / (1024 * 1024) /
     366             :                     (secs + usecs / 1000000.0);
     367          13 :                 write_rate = (double) BLCKSZ * VacuumPageDirty / (1024 * 1024) /
     368             :                     (secs + usecs / 1000000.0);
     369             :             }
     370             : 
     371             :             /*
     372             :              * This is pretty messy, but we split it up so that we can skip
     373             :              * emitting individual parts of the message when not applicable.
     374             :              */
     375          13 :             initStringInfo(&buf);
     376          26 :             appendStringInfo(&buf, _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n"),
     377             :                              get_database_name(MyDatabaseId),
     378          13 :                              get_namespace_name(RelationGetNamespace(onerel)),
     379          13 :                              RelationGetRelationName(onerel),
     380             :                              vacrelstats->num_index_scans);
     381          13 :             appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen\n"),
     382             :                              vacrelstats->pages_removed,
     383             :                              vacrelstats->rel_pages,
     384             :                              vacrelstats->pinskipped_pages,
     385             :                              vacrelstats->frozenskipped_pages);
     386          13 :             appendStringInfo(&buf,
     387             :                              _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: %u\n"),
     388             :                              vacrelstats->tuples_deleted,
     389             :                              vacrelstats->new_rel_tuples,
     390             :                              vacrelstats->new_dead_tuples,
     391             :                              OldestXmin);
     392          13 :             appendStringInfo(&buf,
     393             :                              _("buffer usage: %d hits, %d misses, %d dirtied\n"),
     394             :                              VacuumPageHit,
     395             :                              VacuumPageMiss,
     396             :                              VacuumPageDirty);
     397          13 :             appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
     398             :                              read_rate, write_rate);
     399          13 :             appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
     400             : 
     401          13 :             ereport(LOG,
     402             :                     (errmsg_internal("%s", buf.data)));
     403          13 :             pfree(buf.data);
     404             :         }
     405             :     }
     406         390 : }
     407             : 
     408             : /*
     409             :  * For Hot Standby we need to know the highest transaction id that will
     410             :  * be removed by any change. VACUUM proceeds in a number of passes so
     411             :  * we need to consider how each pass operates. The first phase runs
     412             :  * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
     413             :  * progresses - these will have a latestRemovedXid on each record.
     414             :  * In some cases this removes all of the tuples to be removed, though
     415             :  * often we have dead tuples with index pointers so we must remember them
     416             :  * for removal in phase 3. Index records for those rows are removed
     417             :  * in phase 2 and index blocks do not have MVCC information attached.
     418             :  * So before we can allow removal of any index tuples we need to issue
     419             :  * a WAL record containing the latestRemovedXid of rows that will be
     420             :  * removed in phase three. This allows recovery queries to block at the
     421             :  * correct place, i.e. before phase two, rather than during phase three
     422             :  * which would be after the rows have become inaccessible.
     423             :  */
     424             : static void
     425          66 : vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
     426             : {
     427             :     /*
     428             :      * Skip this for relations for which no WAL is to be written, or if we're
     429             :      * not trying to support archive recovery.
     430             :      */
     431          66 :     if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
     432          66 :         return;
     433             : 
     434             :     /*
     435             :      * No need to write the record at all unless it contains a valid value
     436             :      */
     437          66 :     if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
     438          56 :         (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid);
     439             : }
     440             : 
     441             : /*
     442             :  *  lazy_scan_heap() -- scan an open heap relation
     443             :  *
     444             :  *      This routine prunes each page in the heap, which will among other
     445             :  *      things truncate dead tuples to dead line pointers, defragment the
     446             :  *      page, and set commit status bits (see heap_page_prune).  It also builds
     447             :  *      lists of dead tuples and pages with free space, calculates statistics
     448             :  *      on the number of live tuples in the heap, and marks pages as
     449             :  *      all-visible if appropriate.  When done, or when we run low on space for
     450             :  *      dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
     451             :  *      to reclaim dead line pointers.
     452             :  *
     453             :  *      If there are no indexes then we can reclaim line pointers on the fly;
     454             :  *      dead line pointers need only be retained until all index pointers that
     455             :  *      reference them have been killed.
     456             :  */
     457             : static void
     458         390 : lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
     459             :                Relation *Irel, int nindexes, bool aggressive)
     460             : {
     461             :     BlockNumber nblocks,
     462             :                 blkno;
     463             :     HeapTupleData tuple;
     464             :     char       *relname;
     465             :     BlockNumber empty_pages,
     466             :                 vacuumed_pages;
     467             :     double      num_tuples,
     468             :                 tups_vacuumed,
     469             :                 nkeep,
     470             :                 nunused;
     471             :     IndexBulkDeleteResult **indstats;
     472             :     int         i;
     473             :     PGRUsage    ru0;
     474         390 :     Buffer      vmbuffer = InvalidBuffer;
     475             :     BlockNumber next_unskippable_block;
     476             :     bool        skipping_blocks;
     477             :     xl_heap_freeze_tuple *frozen;
     478             :     StringInfoData buf;
     479         390 :     const int   initprog_index[] = {
     480             :         PROGRESS_VACUUM_PHASE,
     481             :         PROGRESS_VACUUM_TOTAL_HEAP_BLKS,
     482             :         PROGRESS_VACUUM_MAX_DEAD_TUPLES
     483             :     };
     484             :     int64       initprog_val[3];
     485             : 
     486         390 :     pg_rusage_init(&ru0);
     487             : 
     488         390 :     relname = RelationGetRelationName(onerel);
     489         390 :     ereport(elevel,
     490             :             (errmsg("vacuuming \"%s.%s\"",
     491             :                     get_namespace_name(RelationGetNamespace(onerel)),
     492             :                     relname)));
     493             : 
     494         390 :     empty_pages = vacuumed_pages = 0;
     495         390 :     num_tuples = tups_vacuumed = nkeep = nunused = 0;
     496             : 
     497         390 :     indstats = (IndexBulkDeleteResult **)
     498         390 :         palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
     499             : 
     500         390 :     nblocks = RelationGetNumberOfBlocks(onerel);
     501         390 :     vacrelstats->rel_pages = nblocks;
     502         390 :     vacrelstats->scanned_pages = 0;
     503         390 :     vacrelstats->tupcount_pages = 0;
     504         390 :     vacrelstats->nonempty_pages = 0;
     505         390 :     vacrelstats->latestRemovedXid = InvalidTransactionId;
     506             : 
     507         390 :     lazy_space_alloc(vacrelstats, nblocks);
     508         390 :     frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
     509             : 
     510             :     /* Report that we're scanning the heap, advertising total # of blocks */
     511         390 :     initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
     512         390 :     initprog_val[1] = nblocks;
     513         390 :     initprog_val[2] = vacrelstats->max_dead_tuples;
     514         390 :     pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
     515             : 
     516             :     /*
     517             :      * Except when aggressive is set, we want to skip pages that are
     518             :      * all-visible according to the visibility map, but only when we can skip
     519             :      * at least SKIP_PAGES_THRESHOLD consecutive pages.  Since we're reading
     520             :      * sequentially, the OS should be doing readahead for us, so there's no
     521             :      * gain in skipping a page now and then; that's likely to disable
     522             :      * readahead and so be counterproductive. Also, skipping even a single
     523             :      * page means that we can't update relfrozenxid, so we only want to do it
     524             :      * if we can skip a goodly number of pages.
     525             :      *
     526             :      * When aggressive is set, we can't skip pages just because they are
     527             :      * all-visible, but we can still skip pages that are all-frozen, since
     528             :      * such pages do not need freezing and do not affect the value that we can
     529             :      * safely set for relfrozenxid or relminmxid.
     530             :      *
     531             :      * Before entering the main loop, establish the invariant that
     532             :      * next_unskippable_block is the next block number >= blkno that we can't
     533             :      * skip based on the visibility map, either all-visible for a regular scan
     534             :      * or all-frozen for an aggressive scan.  We set it to nblocks if there's
     535             :      * no such block.  We also set up the skipping_blocks flag correctly at
     536             :      * this stage.
     537             :      *
     538             :      * Note: The value returned by visibilitymap_get_status could be slightly
     539             :      * out-of-date, since we make this test before reading the corresponding
     540             :      * heap page or locking the buffer.  This is OK.  If we mistakenly think
     541             :      * that the page is all-visible or all-frozen when in fact the flag's just
     542             :      * been cleared, we might fail to vacuum the page.  It's easy to see that
     543             :      * skipping a page when aggressive is not set is not a very big deal; we
     544             :      * might leave some dead tuples lying around, but the next vacuum will
     545             :      * find them.  But even when aggressive *is* set, it's still OK if we miss
     546             :      * a page whose all-frozen marking has just been cleared.  Any new XIDs
     547             :      * just added to that page are necessarily newer than the GlobalXmin we
     548             :      * computed, so they'll have no effect on the value to which we can safely
     549             :      * set relfrozenxid.  A similar argument applies for MXIDs and relminmxid.
     550             :      *
     551             :      * We will scan the table's last page, at least to the extent of
     552             :      * determining whether it has tuples or not, even if it should be skipped
     553             :      * according to the above rules; except when we've already determined that
     554             :      * it's not worth trying to truncate the table.  This avoids having
     555             :      * lazy_truncate_heap() take access-exclusive lock on the table to attempt
     556             :      * a truncation that just fails immediately because there are tuples in
     557             :      * the last page.  This is worth avoiding mainly because such a lock must
     558             :      * be replayed on any hot standby, where it can be disruptive.
     559             :      */
     560         390 :     next_unskippable_block = 0;
     561         390 :     if ((options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
     562             :     {
     563        1509 :         while (next_unskippable_block < nblocks)
     564             :         {
     565             :             uint8       vmstatus;
     566             : 
     567         960 :             vmstatus = visibilitymap_get_status(onerel, next_unskippable_block,
     568             :                                                 &vmbuffer);
     569         960 :             if (aggressive)
     570             :             {
     571          50 :                 if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0)
     572          50 :                     break;
     573             :             }
     574             :             else
     575             :             {
     576         910 :                 if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0)
     577         179 :                     break;
     578             :             }
     579         731 :             vacuum_delay_point();
     580         731 :             next_unskippable_block++;
     581             :         }
     582             :     }
     583             : 
     584         390 :     if (next_unskippable_block >= SKIP_PAGES_THRESHOLD)
     585           5 :         skipping_blocks = true;
     586             :     else
     587         385 :         skipping_blocks = false;
     588             : 
     589        5764 :     for (blkno = 0; blkno < nblocks; blkno++)
     590             :     {
     591             :         Buffer      buf;
     592             :         Page        page;
     593             :         OffsetNumber offnum,
     594             :                     maxoff;
     595             :         bool        tupgone,
     596             :                     hastup;
     597             :         int         prev_dead_count;
     598             :         int         nfrozen;
     599             :         Size        freespace;
     600        5374 :         bool        all_visible_according_to_vm = false;
     601             :         bool        all_visible;
     602        5374 :         bool        all_frozen = true;  /* provided all_visible is also true */
     603             :         bool        has_dead_tuples;
     604        5374 :         TransactionId visibility_cutoff_xid = InvalidTransactionId;
     605             : 
     606             :         /* see note above about forcing scanning of last page */
     607             : #define FORCE_CHECK_PAGE() \
     608             :         (blkno == nblocks - 1 && should_attempt_truncation(vacrelstats))
     609             : 
     610        5374 :         pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
     611             : 
     612        5374 :         if (blkno == next_unskippable_block)
     613             :         {
     614             :             /* Time to advance next_unskippable_block */
     615        4455 :             next_unskippable_block++;
     616        4455 :             if ((options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
     617             :             {
     618        9096 :                 while (next_unskippable_block < nblocks)
     619             :                 {
     620             :                     uint8       vmskipflags;
     621             : 
     622        4413 :                     vmskipflags = visibilitymap_get_status(onerel,
     623             :                                                            next_unskippable_block,
     624             :                                                            &vmbuffer);
     625        4413 :                     if (aggressive)
     626             :                     {
     627         345 :                         if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0)
     628         345 :                             break;
     629             :                     }
     630             :                     else
     631             :                     {
     632        4068 :                         if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0)
     633        3880 :                             break;
     634             :                     }
     635         188 :                     vacuum_delay_point();
     636         188 :                     next_unskippable_block++;
     637             :                 }
     638             :             }
     639             : 
     640             :             /*
     641             :              * We know we can't skip the current block.  But set up
     642             :              * skipping_blocks to do the right thing at the following blocks.
     643             :              */
     644        4455 :             if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD)
     645           1 :                 skipping_blocks = true;
     646             :             else
     647        4454 :                 skipping_blocks = false;
     648             : 
     649             :             /*
     650             :              * Normally, the fact that we can't skip this block must mean that
     651             :              * it's not all-visible.  But in an aggressive vacuum we know only
     652             :              * that it's not all-frozen, so it might still be all-visible.
     653             :              */
     654        4455 :             if (aggressive && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
     655           0 :                 all_visible_according_to_vm = true;
     656             :         }
     657             :         else
     658             :         {
     659             :             /*
     660             :              * The current block is potentially skippable; if we've seen a
     661             :              * long enough run of skippable blocks to justify skipping it, and
     662             :              * we're not forced to check it, then go ahead and skip.
     663             :              * Otherwise, the page must be at least all-visible if not
     664             :              * all-frozen, so we can set all_visible_according_to_vm = true.
     665             :              */
     666         919 :             if (skipping_blocks && !FORCE_CHECK_PAGE())
     667             :             {
     668             :                 /*
     669             :                  * Tricky, tricky.  If this is in aggressive vacuum, the page
     670             :                  * must have been all-frozen at the time we checked whether it
     671             :                  * was skippable, but it might not be any more.  We must be
     672             :                  * careful to count it as a skipped all-frozen page in that
     673             :                  * case, or else we'll think we can't update relfrozenxid and
     674             :                  * relminmxid.  If it's not an aggressive vacuum, we don't
     675             :                  * know whether it was all-frozen, so we have to recheck; but
     676             :                  * in this case an approximate answer is OK.
     677             :                  */
     678         635 :                 if (aggressive || VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
     679         232 :                     vacrelstats->frozenskipped_pages++;
     680        1270 :                 continue;
     681             :             }
     682         284 :             all_visible_according_to_vm = true;
     683             :         }
     684             : 
     685        4739 :         vacuum_delay_point();
     686             : 
     687             :         /*
     688             :          * If we are close to overrunning the available space for dead-tuple
     689             :          * TIDs, pause and do a cycle of vacuuming before we tackle this page.
     690             :          */
     691        4739 :         if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
     692           0 :             vacrelstats->num_dead_tuples > 0)
     693             :         {
     694           0 :             const int   hvp_index[] = {
     695             :                 PROGRESS_VACUUM_PHASE,
     696             :                 PROGRESS_VACUUM_NUM_INDEX_VACUUMS
     697             :             };
     698             :             int64       hvp_val[2];
     699             : 
     700             :             /*
     701             :              * Before beginning index vacuuming, we release any pin we may
     702             :              * hold on the visibility map page.  This isn't necessary for
     703             :              * correctness, but we do it anyway to avoid holding the pin
     704             :              * across a lengthy, unrelated operation.
     705             :              */
     706           0 :             if (BufferIsValid(vmbuffer))
     707             :             {
     708           0 :                 ReleaseBuffer(vmbuffer);
     709           0 :                 vmbuffer = InvalidBuffer;
     710             :             }
     711             : 
     712             :             /* Log cleanup info before we touch indexes */
     713           0 :             vacuum_log_cleanup_info(onerel, vacrelstats);
     714             : 
     715             :             /* Report that we are now vacuuming indexes */
     716           0 :             pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     717             :                                          PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
     718             : 
     719             :             /* Remove index entries */
     720           0 :             for (i = 0; i < nindexes; i++)
     721           0 :                 lazy_vacuum_index(Irel[i],
     722           0 :                                   &indstats[i],
     723             :                                   vacrelstats);
     724             : 
     725             :             /*
     726             :              * Report that we are now vacuuming the heap.  We also increase
     727             :              * the number of index scans here; note that by using
     728             :              * pgstat_progress_update_multi_param we can update both
     729             :              * parameters atomically.
     730             :              */
     731           0 :             hvp_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_HEAP;
     732           0 :             hvp_val[1] = vacrelstats->num_index_scans + 1;
     733           0 :             pgstat_progress_update_multi_param(2, hvp_index, hvp_val);
     734             : 
     735             :             /* Remove tuples from heap */
     736           0 :             lazy_vacuum_heap(onerel, vacrelstats);
     737             : 
     738             :             /*
     739             :              * Forget the now-vacuumed tuples, and press on, but be careful
     740             :              * not to reset latestRemovedXid since we want that value to be
     741             :              * valid.
     742             :              */
     743           0 :             vacrelstats->num_dead_tuples = 0;
     744           0 :             vacrelstats->num_index_scans++;
     745             : 
     746             :             /* Report that we are once again scanning the heap */
     747           0 :             pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
     748             :                                          PROGRESS_VACUUM_PHASE_SCAN_HEAP);
     749             :         }
     750             : 
     751             :         /*
     752             :          * Pin the visibility map page in case we need to mark the page
     753             :          * all-visible.  In most cases this will be very cheap, because we'll
     754             :          * already have the correct page pinned anyway.  However, it's
     755             :          * possible that (a) next_unskippable_block is covered by a different
     756             :          * VM page than the current block or (b) we released our pin and did a
     757             :          * cycle of index vacuuming.
     758             :          *
     759             :          */
     760        4739 :         visibilitymap_pin(onerel, blkno, &vmbuffer);
     761             : 
     762        4739 :         buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
     763             :                                  RBM_NORMAL, vac_strategy);
     764             : 
     765             :         /* We need buffer cleanup lock so that we can prune HOT chains. */
     766        4739 :         if (!ConditionalLockBufferForCleanup(buf))
     767             :         {
     768             :             /*
     769             :              * If we're not performing an aggressive scan to guard against XID
     770             :              * wraparound, and we don't want to forcibly check the page, then
     771             :              * it's OK to skip vacuuming pages we get a lock conflict on. They
     772             :              * will be dealt with in some future vacuum.
     773             :              */
     774           0 :             if (!aggressive && !FORCE_CHECK_PAGE())
     775             :             {
     776           0 :                 ReleaseBuffer(buf);
     777           0 :                 vacrelstats->pinskipped_pages++;
     778           0 :                 continue;
     779             :             }
     780             : 
     781             :             /*
     782             :              * Read the page with share lock to see if any xids on it need to
     783             :              * be frozen.  If not we just skip the page, after updating our
     784             :              * scan statistics.  If there are some, we wait for cleanup lock.
     785             :              *
     786             :              * We could defer the lock request further by remembering the page
     787             :              * and coming back to it later, or we could even register
     788             :              * ourselves for multiple buffers and then service whichever one
     789             :              * is received first.  For now, this seems good enough.
     790             :              *
     791             :              * If we get here with aggressive false, then we're just forcibly
     792             :              * checking the page, and so we don't want to insist on getting
     793             :              * the lock; we only need to know if the page contains tuples, so
     794             :              * that we can update nonempty_pages correctly.  It's convenient
     795             :              * to use lazy_check_needs_freeze() for both situations, though.
     796             :              */
     797           0 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     798           0 :             if (!lazy_check_needs_freeze(buf, &hastup))
     799             :             {
     800           0 :                 UnlockReleaseBuffer(buf);
     801           0 :                 vacrelstats->scanned_pages++;
     802           0 :                 vacrelstats->pinskipped_pages++;
     803           0 :                 if (hastup)
     804           0 :                     vacrelstats->nonempty_pages = blkno + 1;
     805           0 :                 continue;
     806             :             }
     807           0 :             if (!aggressive)
     808             :             {
     809             :                 /*
     810             :                  * Here, we must not advance scanned_pages; that would amount
     811             :                  * to claiming that the page contains no freezable tuples.
     812             :                  */
     813           0 :                 UnlockReleaseBuffer(buf);
     814           0 :                 vacrelstats->pinskipped_pages++;
     815           0 :                 if (hastup)
     816           0 :                     vacrelstats->nonempty_pages = blkno + 1;
     817           0 :                 continue;
     818             :             }
     819           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     820           0 :             LockBufferForCleanup(buf);
     821             :             /* drop through to normal processing */
     822             :         }
     823             : 
     824        4739 :         vacrelstats->scanned_pages++;
     825        4739 :         vacrelstats->tupcount_pages++;
     826             : 
     827        4739 :         page = BufferGetPage(buf);
     828             : 
     829        4739 :         if (PageIsNew(page))
     830             :         {
     831             :             /*
     832             :              * An all-zeroes page could be left over if a backend extends the
     833             :              * relation but crashes before initializing the page. Reclaim such
     834             :              * pages for use.
     835             :              *
     836             :              * We have to be careful here because we could be looking at a
     837             :              * page that someone has just added to the relation and not yet
     838             :              * been able to initialize (see RelationGetBufferForTuple). To
     839             :              * protect against that, release the buffer lock, grab the
     840             :              * relation extension lock momentarily, and re-lock the buffer. If
     841             :              * the page is still uninitialized by then, it must be left over
     842             :              * from a crashed backend, and we can initialize it.
     843             :              *
     844             :              * We don't really need the relation lock when this is a new or
     845             :              * temp relation, but it's probably not worth the code space to
     846             :              * check that, since this surely isn't a critical path.
     847             :              *
     848             :              * Note: the comparable code in vacuum.c need not worry because
     849             :              * it's got exclusive lock on the whole relation.
     850             :              */
     851           0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     852           0 :             LockRelationForExtension(onerel, ExclusiveLock);
     853           0 :             UnlockRelationForExtension(onerel, ExclusiveLock);
     854           0 :             LockBufferForCleanup(buf);
     855           0 :             if (PageIsNew(page))
     856             :             {
     857           0 :                 ereport(WARNING,
     858             :                         (errmsg("relation \"%s\" page %u is uninitialized --- fixing",
     859             :                                 relname, blkno)));
     860           0 :                 PageInit(page, BufferGetPageSize(buf), 0);
     861           0 :                 empty_pages++;
     862             :             }
     863           0 :             freespace = PageGetHeapFreeSpace(page);
     864           0 :             MarkBufferDirty(buf);
     865           0 :             UnlockReleaseBuffer(buf);
     866             : 
     867           0 :             RecordPageWithFreeSpace(onerel, blkno, freespace);
     868           0 :             continue;
     869             :         }
     870             : 
     871        4739 :         if (PageIsEmpty(page))
     872             :         {
     873           0 :             empty_pages++;
     874           0 :             freespace = PageGetHeapFreeSpace(page);
     875             : 
     876             :             /* empty pages are always all-visible and all-frozen */
     877           0 :             if (!PageIsAllVisible(page))
     878             :             {
     879           0 :                 START_CRIT_SECTION();
     880             : 
     881             :                 /* mark buffer dirty before writing a WAL record */
     882           0 :                 MarkBufferDirty(buf);
     883             : 
     884             :                 /*
     885             :                  * It's possible that another backend has extended the heap,
     886             :                  * initialized the page, and then failed to WAL-log the page
     887             :                  * due to an ERROR.  Since heap extension is not WAL-logged,
     888             :                  * recovery might try to replay our record setting the page
     889             :                  * all-visible and find that the page isn't initialized, which
     890             :                  * will cause a PANIC.  To prevent that, check whether the
     891             :                  * page has been previously WAL-logged, and if not, do that
     892             :                  * now.
     893             :                  */
     894           0 :                 if (RelationNeedsWAL(onerel) &&
     895           0 :                     PageGetLSN(page) == InvalidXLogRecPtr)
     896           0 :                     log_newpage_buffer(buf, true);
     897             : 
     898           0 :                 PageSetAllVisible(page);
     899           0 :                 visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
     900             :                                   vmbuffer, InvalidTransactionId,
     901             :                                   VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
     902           0 :                 END_CRIT_SECTION();
     903             :             }
     904             : 
     905           0 :             UnlockReleaseBuffer(buf);
     906           0 :             RecordPageWithFreeSpace(onerel, blkno, freespace);
     907           0 :             continue;
     908             :         }
     909             : 
     910             :         /*
     911             :          * Prune all HOT-update chains in this page.
     912             :          *
     913             :          * We count tuples removed by the pruning step as removed by VACUUM.
     914             :          */
     915        4739 :         tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
     916             :                                          &vacrelstats->latestRemovedXid);
     917             : 
     918             :         /*
     919             :          * Now scan the page to collect vacuumable items and check for tuples
     920             :          * requiring freezing.
     921             :          */
     922        4739 :         all_visible = true;
     923        4739 :         has_dead_tuples = false;
     924        4739 :         nfrozen = 0;
     925        4739 :         hastup = false;
     926        4739 :         prev_dead_count = vacrelstats->num_dead_tuples;
     927        4739 :         maxoff = PageGetMaxOffsetNumber(page);
     928             : 
     929             :         /*
     930             :          * Note: If you change anything in the loop below, also look at
     931             :          * heap_page_is_all_visible to see if that needs to be changed.
     932             :          */
     933      480836 :         for (offnum = FirstOffsetNumber;
     934             :              offnum <= maxoff;
     935      471358 :              offnum = OffsetNumberNext(offnum))
     936             :         {
     937             :             ItemId      itemid;
     938             : 
     939      471358 :             itemid = PageGetItemId(page, offnum);
     940             : 
     941             :             /* Unused items require no processing, but we count 'em */
     942      471358 :             if (!ItemIdIsUsed(itemid))
     943             :             {
     944        6840 :                 nunused += 1;
     945        6840 :                 continue;
     946             :             }
     947             : 
     948             :             /* Redirect items mustn't be touched */
     949      464518 :             if (ItemIdIsRedirected(itemid))
     950             :             {
     951         303 :                 hastup = true;  /* this page won't be truncatable */
     952         303 :                 continue;
     953             :             }
     954             : 
     955      464215 :             ItemPointerSet(&(tuple.t_self), blkno, offnum);
     956             : 
     957             :             /*
     958             :              * DEAD item pointers are to be vacuumed normally; but we don't
     959             :              * count them in tups_vacuumed, else we'd be double-counting (at
     960             :              * least in the common case where heap_page_prune() just freed up
     961             :              * a non-HOT tuple).
     962             :              */
     963      464215 :             if (ItemIdIsDead(itemid))
     964             :             {
     965       82231 :                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
     966       82231 :                 all_visible = false;
     967       82231 :                 continue;
     968             :             }
     969             : 
     970      381984 :             Assert(ItemIdIsNormal(itemid));
     971             : 
     972      381984 :             tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
     973      381984 :             tuple.t_len = ItemIdGetLength(itemid);
     974      381984 :             tuple.t_tableOid = RelationGetRelid(onerel);
     975             : 
     976      381984 :             tupgone = false;
     977             : 
     978      381984 :             switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
     979             :             {
     980             :                 case HEAPTUPLE_DEAD:
     981             : 
     982             :                     /*
     983             :                      * Ordinarily, DEAD tuples would have been removed by
     984             :                      * heap_page_prune(), but it's possible that the tuple
     985             :                      * state changed since heap_page_prune() looked.  In
     986             :                      * particular an INSERT_IN_PROGRESS tuple could have
     987             :                      * changed to DEAD if the inserter aborted.  So this
     988             :                      * cannot be considered an error condition.
     989             :                      *
     990             :                      * If the tuple is HOT-updated then it must only be
     991             :                      * removed by a prune operation; so we keep it just as if
     992             :                      * it were RECENTLY_DEAD.  Also, if it's a heap-only
     993             :                      * tuple, we choose to keep it, because it'll be a lot
     994             :                      * cheaper to get rid of it in the next pruning pass than
     995             :                      * to treat it like an indexed tuple.
     996             :                      */
     997           0 :                     if (HeapTupleIsHotUpdated(&tuple) ||
     998           0 :                         HeapTupleIsHeapOnly(&tuple))
     999           0 :                         nkeep += 1;
    1000             :                     else
    1001           0 :                         tupgone = true; /* we can delete the tuple */
    1002           0 :                     all_visible = false;
    1003           0 :                     break;
    1004             :                 case HEAPTUPLE_LIVE:
    1005             :                     /* Tuple is good --- but let's do some validity checks */
    1006      378183 :                     if (onerel->rd_rel->relhasoids &&
    1007       65326 :                         !OidIsValid(HeapTupleGetOid(&tuple)))
    1008           0 :                         elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid",
    1009             :                              relname, blkno, offnum);
    1010             : 
    1011             :                     /*
    1012             :                      * Is the tuple definitely visible to all transactions?
    1013             :                      *
    1014             :                      * NB: Like with per-tuple hint bits, we can't set the
    1015             :                      * PD_ALL_VISIBLE flag if the inserter committed
    1016             :                      * asynchronously. See SetHintBits for more info. Check
    1017             :                      * that the tuple is hinted xmin-committed because of
    1018             :                      * that.
    1019             :                      */
    1020      345520 :                     if (all_visible)
    1021             :                     {
    1022             :                         TransactionId xmin;
    1023             : 
    1024      301978 :                         if (!HeapTupleHeaderXminCommitted(tuple.t_data))
    1025             :                         {
    1026           0 :                             all_visible = false;
    1027           0 :                             break;
    1028             :                         }
    1029             : 
    1030             :                         /*
    1031             :                          * The inserter definitely committed. But is it old
    1032             :                          * enough that everyone sees it as committed?
    1033             :                          */
    1034      301978 :                         xmin = HeapTupleHeaderGetXmin(tuple.t_data);
    1035      301978 :                         if (!TransactionIdPrecedes(xmin, OldestXmin))
    1036             :                         {
    1037         121 :                             all_visible = false;
    1038         121 :                             break;
    1039             :                         }
    1040             : 
    1041             :                         /* Track newest xmin on page. */
    1042      301857 :                         if (TransactionIdFollows(xmin, visibility_cutoff_xid))
    1043        5208 :                             visibility_cutoff_xid = xmin;
    1044             :                     }
    1045      345399 :                     break;
    1046             :                 case HEAPTUPLE_RECENTLY_DEAD:
    1047             : 
    1048             :                     /*
    1049             :                      * If tuple is recently deleted then we must not remove it
    1050             :                      * from relation.
    1051             :                      */
    1052       36461 :                     nkeep += 1;
    1053       36461 :                     all_visible = false;
    1054       36461 :                     break;
    1055             :                 case HEAPTUPLE_INSERT_IN_PROGRESS:
    1056             :                     /* This is an expected case during concurrent vacuum */
    1057           3 :                     all_visible = false;
    1058           3 :                     break;
    1059             :                 case HEAPTUPLE_DELETE_IN_PROGRESS:
    1060             :                     /* This is an expected case during concurrent vacuum */
    1061           0 :                     all_visible = false;
    1062           0 :                     break;
    1063             :                 default:
    1064           0 :                     elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1065             :                     break;
    1066             :             }
    1067             : 
    1068      381984 :             if (tupgone)
    1069             :             {
    1070           0 :                 lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
    1071           0 :                 HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
    1072             :                                                        &vacrelstats->latestRemovedXid);
    1073           0 :                 tups_vacuumed += 1;
    1074           0 :                 has_dead_tuples = true;
    1075             :             }
    1076             :             else
    1077             :             {
    1078             :                 bool        tuple_totally_frozen;
    1079             : 
    1080      381984 :                 num_tuples += 1;
    1081      381984 :                 hastup = true;
    1082             : 
    1083             :                 /*
    1084             :                  * Each non-removable tuple must be checked to see if it needs
    1085             :                  * freezing.  Note we already have exclusive buffer lock.
    1086             :                  */
    1087      763968 :                 if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit,
    1088      381984 :                                               MultiXactCutoff, &frozen[nfrozen],
    1089             :                                               &tuple_totally_frozen))
    1090       15291 :                     frozen[nfrozen++].offset = offnum;
    1091             : 
    1092      381984 :                 if (!tuple_totally_frozen)
    1093      342024 :                     all_frozen = false;
    1094             :             }
    1095             :         }                       /* scan along page */
    1096             : 
    1097             :         /*
    1098             :          * If we froze any tuples, mark the buffer dirty, and write a WAL
    1099             :          * record recording the changes.  We must log the changes to be
    1100             :          * crash-safe against future truncation of CLOG.
    1101             :          */
    1102        4739 :         if (nfrozen > 0)
    1103             :         {
    1104         250 :             START_CRIT_SECTION();
    1105             : 
    1106         250 :             MarkBufferDirty(buf);
    1107             : 
    1108             :             /* execute collected freezes */
    1109       15541 :             for (i = 0; i < nfrozen; i++)
    1110             :             {
    1111             :                 ItemId      itemid;
    1112             :                 HeapTupleHeader htup;
    1113             : 
    1114       15291 :                 itemid = PageGetItemId(page, frozen[i].offset);
    1115       15291 :                 htup = (HeapTupleHeader) PageGetItem(page, itemid);
    1116             : 
    1117       15291 :                 heap_execute_freeze_tuple(htup, &frozen[i]);
    1118             :             }
    1119             : 
    1120             :             /* Now WAL-log freezing if necessary */
    1121         250 :             if (RelationNeedsWAL(onerel))
    1122             :             {
    1123             :                 XLogRecPtr  recptr;
    1124             : 
    1125         250 :                 recptr = log_heap_freeze(onerel, buf, FreezeLimit,
    1126             :                                          frozen, nfrozen);
    1127         250 :                 PageSetLSN(page, recptr);
    1128             :             }
    1129             : 
    1130         250 :             END_CRIT_SECTION();
    1131             :         }
    1132             : 
    1133             :         /*
    1134             :          * If there are no indexes then we can vacuum the page right now
    1135             :          * instead of doing a second scan.
    1136             :          */
    1137        5141 :         if (nindexes == 0 &&
    1138         402 :             vacrelstats->num_dead_tuples > 0)
    1139             :         {
    1140             :             /* Remove tuples from heap */
    1141          12 :             lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
    1142          12 :             has_dead_tuples = false;
    1143             : 
    1144             :             /*
    1145             :              * Forget the now-vacuumed tuples, and press on, but be careful
    1146             :              * not to reset latestRemovedXid since we want that value to be
    1147             :              * valid.
    1148             :              */
    1149          12 :             vacrelstats->num_dead_tuples = 0;
    1150          12 :             vacuumed_pages++;
    1151             :         }
    1152             : 
    1153        4739 :         freespace = PageGetHeapFreeSpace(page);
    1154             : 
    1155             :         /* mark page all-visible, if appropriate */
    1156        4739 :         if (all_visible && !all_visible_according_to_vm)
    1157        2919 :         {
    1158        2919 :             uint8       flags = VISIBILITYMAP_ALL_VISIBLE;
    1159             : 
    1160        2919 :             if (all_frozen)
    1161         369 :                 flags |= VISIBILITYMAP_ALL_FROZEN;
    1162             : 
    1163             :             /*
    1164             :              * It should never be the case that the visibility map page is set
    1165             :              * while the page-level bit is clear, but the reverse is allowed
    1166             :              * (if checksums are not enabled).  Regardless, set the both bits
    1167             :              * so that we get back in sync.
    1168             :              *
    1169             :              * NB: If the heap page is all-visible but the VM bit is not set,
    1170             :              * we don't need to dirty the heap page.  However, if checksums
    1171             :              * are enabled, we do need to make sure that the heap page is
    1172             :              * dirtied before passing it to visibilitymap_set(), because it
    1173             :              * may be logged.  Given that this situation should only happen in
    1174             :              * rare cases after a crash, it is not worth optimizing.
    1175             :              */
    1176        2919 :             PageSetAllVisible(page);
    1177        2919 :             MarkBufferDirty(buf);
    1178        2919 :             visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
    1179             :                               vmbuffer, visibility_cutoff_xid, flags);
    1180             :         }
    1181             : 
    1182             :         /*
    1183             :          * As of PostgreSQL 9.2, the visibility map bit should never be set if
    1184             :          * the page-level bit is clear.  However, it's possible that the bit
    1185             :          * got cleared after we checked it and before we took the buffer
    1186             :          * content lock, so we must recheck before jumping to the conclusion
    1187             :          * that something bad has happened.
    1188             :          */
    1189        1820 :         else if (all_visible_according_to_vm && !PageIsAllVisible(page)
    1190           0 :                  && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer))
    1191             :         {
    1192           0 :             elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
    1193             :                  relname, blkno);
    1194           0 :             visibilitymap_clear(onerel, blkno, vmbuffer,
    1195             :                                 VISIBILITYMAP_VALID_BITS);
    1196             :         }
    1197             : 
    1198             :         /*
    1199             :          * It's possible for the value returned by GetOldestXmin() to move
    1200             :          * backwards, so it's not wrong for us to see tuples that appear to
    1201             :          * not be visible to everyone yet, while PD_ALL_VISIBLE is already
    1202             :          * set. The real safe xmin value never moves backwards, but
    1203             :          * GetOldestXmin() is conservative and sometimes returns a value
    1204             :          * that's unnecessarily small, so if we see that contradiction it just
    1205             :          * means that the tuples that we think are not visible to everyone yet
    1206             :          * actually are, and the PD_ALL_VISIBLE flag is correct.
    1207             :          *
    1208             :          * There should never be dead tuples on a page with PD_ALL_VISIBLE
    1209             :          * set, however.
    1210             :          */
    1211        1820 :         else if (PageIsAllVisible(page) && has_dead_tuples)
    1212             :         {
    1213           0 :             elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u",
    1214             :                  relname, blkno);
    1215           0 :             PageClearAllVisible(page);
    1216           0 :             MarkBufferDirty(buf);
    1217           0 :             visibilitymap_clear(onerel, blkno, vmbuffer,
    1218             :                                 VISIBILITYMAP_VALID_BITS);
    1219             :         }
    1220             : 
    1221             :         /*
    1222             :          * If the all-visible page is turned out to be all-frozen but not
    1223             :          * marked, we should so mark it.  Note that all_frozen is only valid
    1224             :          * if all_visible is true, so we must check both.
    1225             :          */
    1226        2065 :         else if (all_visible_according_to_vm && all_visible && all_frozen &&
    1227         245 :                  !VM_ALL_FROZEN(onerel, blkno, &vmbuffer))
    1228             :         {
    1229             :             /*
    1230             :              * We can pass InvalidTransactionId as the cutoff XID here,
    1231             :              * because setting the all-frozen bit doesn't cause recovery
    1232             :              * conflicts.
    1233             :              */
    1234           0 :             visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
    1235             :                               vmbuffer, InvalidTransactionId,
    1236             :                               VISIBILITYMAP_ALL_FROZEN);
    1237             :         }
    1238             : 
    1239        4739 :         UnlockReleaseBuffer(buf);
    1240             : 
    1241             :         /* Remember the location of the last page with nonremovable tuples */
    1242        4739 :         if (hastup)
    1243        4149 :             vacrelstats->nonempty_pages = blkno + 1;
    1244             : 
    1245             :         /*
    1246             :          * If we remembered any tuples for deletion, then the page will be
    1247             :          * visited again by lazy_vacuum_heap, which will compute and record
    1248             :          * its post-compaction free space.  If not, then we're done with this
    1249             :          * page, so remember its free space as-is.  (This path will always be
    1250             :          * taken if there are no indexes.)
    1251             :          */
    1252        4739 :         if (vacrelstats->num_dead_tuples == prev_dead_count)
    1253        3629 :             RecordPageWithFreeSpace(onerel, blkno, freespace);
    1254             :     }
    1255             : 
    1256             :     /* report that everything is scanned and vacuumed */
    1257         390 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
    1258             : 
    1259         390 :     pfree(frozen);
    1260             : 
    1261             :     /* save stats for use later */
    1262         390 :     vacrelstats->scanned_tuples = num_tuples;
    1263         390 :     vacrelstats->tuples_deleted = tups_vacuumed;
    1264         390 :     vacrelstats->new_dead_tuples = nkeep;
    1265             : 
    1266             :     /* now we can compute the new value for pg_class.reltuples */
    1267         390 :     vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false,
    1268             :                                                          nblocks,
    1269             :                                                          vacrelstats->tupcount_pages,
    1270             :                                                          num_tuples);
    1271             : 
    1272             :     /*
    1273             :      * Release any remaining pin on visibility map page.
    1274             :      */
    1275         390 :     if (BufferIsValid(vmbuffer))
    1276             :     {
    1277         245 :         ReleaseBuffer(vmbuffer);
    1278         245 :         vmbuffer = InvalidBuffer;
    1279             :     }
    1280             : 
    1281             :     /* If any tuples need to be deleted, perform final vacuum cycle */
    1282             :     /* XXX put a threshold on min number of tuples here? */
    1283         390 :     if (vacrelstats->num_dead_tuples > 0)
    1284             :     {
    1285          66 :         const int   hvp_index[] = {
    1286             :             PROGRESS_VACUUM_PHASE,
    1287             :             PROGRESS_VACUUM_NUM_INDEX_VACUUMS
    1288             :         };
    1289             :         int64       hvp_val[2];
    1290             : 
    1291             :         /* Log cleanup info before we touch indexes */
    1292          66 :         vacuum_log_cleanup_info(onerel, vacrelstats);
    1293             : 
    1294             :         /* Report that we are now vacuuming indexes */
    1295          66 :         pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1296             :                                      PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
    1297             : 
    1298             :         /* Remove index entries */
    1299         195 :         for (i = 0; i < nindexes; i++)
    1300         258 :             lazy_vacuum_index(Irel[i],
    1301         129 :                               &indstats[i],
    1302             :                               vacrelstats);
    1303             : 
    1304             :         /* Report that we are now vacuuming the heap */
    1305          66 :         hvp_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_HEAP;
    1306          66 :         hvp_val[1] = vacrelstats->num_index_scans + 1;
    1307          66 :         pgstat_progress_update_multi_param(2, hvp_index, hvp_val);
    1308             : 
    1309             :         /* Remove tuples from heap */
    1310          66 :         pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1311             :                                      PROGRESS_VACUUM_PHASE_VACUUM_HEAP);
    1312          66 :         lazy_vacuum_heap(onerel, vacrelstats);
    1313          66 :         vacrelstats->num_index_scans++;
    1314             :     }
    1315             : 
    1316             :     /* report all blocks vacuumed; and that we're cleaning up */
    1317         390 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    1318         390 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1319             :                                  PROGRESS_VACUUM_PHASE_INDEX_CLEANUP);
    1320             : 
    1321             :     /* Do post-vacuum cleanup and statistics update for each index */
    1322         835 :     for (i = 0; i < nindexes; i++)
    1323         445 :         lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
    1324             : 
    1325             :     /* If no indexes, make log report that lazy_vacuum_heap would've made */
    1326         390 :     if (vacuumed_pages)
    1327           8 :         ereport(elevel,
    1328             :                 (errmsg("\"%s\": removed %.0f row versions in %u pages",
    1329             :                         RelationGetRelationName(onerel),
    1330             :                         tups_vacuumed, vacuumed_pages)));
    1331             : 
    1332             :     /*
    1333             :      * This is pretty messy, but we split it up so that we can skip emitting
    1334             :      * individual parts of the message when not applicable.
    1335             :      */
    1336         390 :     initStringInfo(&buf);
    1337         390 :     appendStringInfo(&buf,
    1338             :                      _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"),
    1339             :                      nkeep, OldestXmin);
    1340         390 :     appendStringInfo(&buf, _("There were %.0f unused item pointers.\n"),
    1341             :                      nunused);
    1342         390 :     appendStringInfo(&buf, ngettext("Skipped %u page due to buffer pins, ",
    1343             :                                     "Skipped %u pages due to buffer pins, ",
    1344             :                                     vacrelstats->pinskipped_pages),
    1345             :                      vacrelstats->pinskipped_pages);
    1346         390 :     appendStringInfo(&buf, ngettext("%u frozen page.\n",
    1347             :                                     "%u frozen pages.\n",
    1348             :                                     vacrelstats->frozenskipped_pages),
    1349             :                      vacrelstats->frozenskipped_pages);
    1350         390 :     appendStringInfo(&buf, ngettext("%u page is entirely empty.\n",
    1351             :                                     "%u pages are entirely empty.\n",
    1352             :                                     empty_pages),
    1353             :                      empty_pages);
    1354         390 :     appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
    1355             : 
    1356         390 :     ereport(elevel,
    1357             :             (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
    1358             :                     RelationGetRelationName(onerel),
    1359             :                     tups_vacuumed, num_tuples,
    1360             :                     vacrelstats->scanned_pages, nblocks),
    1361             :              errdetail_internal("%s", buf.data)));
    1362         390 :     pfree(buf.data);
    1363         390 : }
    1364             : 
    1365             : 
    1366             : /*
    1367             :  *  lazy_vacuum_heap() -- second pass over the heap
    1368             :  *
    1369             :  *      This routine marks dead tuples as unused and compacts out free
    1370             :  *      space on their pages.  Pages not having dead tuples recorded from
    1371             :  *      lazy_scan_heap are not visited at all.
    1372             :  *
    1373             :  * Note: the reason for doing this as a second pass is we cannot remove
    1374             :  * the tuples until we've removed their index entries, and we want to
    1375             :  * process index entry removal in batches as large as possible.
    1376             :  */
    1377             : static void
    1378          66 : lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
    1379             : {
    1380             :     int         tupindex;
    1381             :     int         npages;
    1382             :     PGRUsage    ru0;
    1383          66 :     Buffer      vmbuffer = InvalidBuffer;
    1384             : 
    1385          66 :     pg_rusage_init(&ru0);
    1386          66 :     npages = 0;
    1387             : 
    1388          66 :     tupindex = 0;
    1389        1242 :     while (tupindex < vacrelstats->num_dead_tuples)
    1390             :     {
    1391             :         BlockNumber tblk;
    1392             :         Buffer      buf;
    1393             :         Page        page;
    1394             :         Size        freespace;
    1395             : 
    1396        1110 :         vacuum_delay_point();
    1397             : 
    1398        1110 :         tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
    1399        1110 :         buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
    1400             :                                  vac_strategy);
    1401        1110 :         if (!ConditionalLockBufferForCleanup(buf))
    1402             :         {
    1403           0 :             ReleaseBuffer(buf);
    1404           0 :             ++tupindex;
    1405           0 :             continue;
    1406             :         }
    1407        1110 :         tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats,
    1408             :                                     &vmbuffer);
    1409             : 
    1410             :         /* Now that we've compacted the page, record its available space */
    1411        1110 :         page = BufferGetPage(buf);
    1412        1110 :         freespace = PageGetHeapFreeSpace(page);
    1413             : 
    1414        1110 :         UnlockReleaseBuffer(buf);
    1415        1110 :         RecordPageWithFreeSpace(onerel, tblk, freespace);
    1416        1110 :         npages++;
    1417             :     }
    1418             : 
    1419          66 :     if (BufferIsValid(vmbuffer))
    1420             :     {
    1421          65 :         ReleaseBuffer(vmbuffer);
    1422          65 :         vmbuffer = InvalidBuffer;
    1423             :     }
    1424             : 
    1425          66 :     ereport(elevel,
    1426             :             (errmsg("\"%s\": removed %d row versions in %d pages",
    1427             :                     RelationGetRelationName(onerel),
    1428             :                     tupindex, npages),
    1429             :              errdetail_internal("%s", pg_rusage_show(&ru0))));
    1430          66 : }
    1431             : 
    1432             : /*
    1433             :  *  lazy_vacuum_page() -- free dead tuples on a page
    1434             :  *                   and repair its fragmentation.
    1435             :  *
    1436             :  * Caller must hold pin and buffer cleanup lock on the buffer.
    1437             :  *
    1438             :  * tupindex is the index in vacrelstats->dead_tuples of the first dead
    1439             :  * tuple for this page.  We assume the rest follow sequentially.
    1440             :  * The return value is the first tupindex after the tuples of this page.
    1441             :  */
    1442             : static int
    1443        1122 : lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
    1444             :                  int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
    1445             : {
    1446        1122 :     Page        page = BufferGetPage(buffer);
    1447             :     OffsetNumber unused[MaxOffsetNumber];
    1448        1122 :     int         uncnt = 0;
    1449             :     TransactionId visibility_cutoff_xid;
    1450             :     bool        all_frozen;
    1451             : 
    1452        1122 :     pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
    1453             : 
    1454        1122 :     START_CRIT_SECTION();
    1455             : 
    1456       83353 :     for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
    1457             :     {
    1458             :         BlockNumber tblk;
    1459             :         OffsetNumber toff;
    1460             :         ItemId      itemid;
    1461             : 
    1462       83275 :         tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
    1463       83275 :         if (tblk != blkno)
    1464        1044 :             break;              /* past end of tuples for this block */
    1465       82231 :         toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
    1466       82231 :         itemid = PageGetItemId(page, toff);
    1467       82231 :         ItemIdSetUnused(itemid);
    1468       82231 :         unused[uncnt++] = toff;
    1469             :     }
    1470             : 
    1471        1122 :     PageRepairFragmentation(page);
    1472             : 
    1473             :     /*
    1474             :      * Mark buffer dirty before we write WAL.
    1475             :      */
    1476        1122 :     MarkBufferDirty(buffer);
    1477             : 
    1478             :     /* XLOG stuff */
    1479        1122 :     if (RelationNeedsWAL(onerel))
    1480             :     {
    1481             :         XLogRecPtr  recptr;
    1482             : 
    1483        1122 :         recptr = log_heap_clean(onerel, buffer,
    1484             :                                 NULL, 0, NULL, 0,
    1485             :                                 unused, uncnt,
    1486             :                                 vacrelstats->latestRemovedXid);
    1487        1122 :         PageSetLSN(page, recptr);
    1488             :     }
    1489             : 
    1490             :     /*
    1491             :      * End critical section, so we safely can do visibility tests (which
    1492             :      * possibly need to perform IO and allocate memory!). If we crash now the
    1493             :      * page (including the corresponding vm bit) might not be marked all
    1494             :      * visible, but that's fine. A later vacuum will fix that.
    1495             :      */
    1496        1122 :     END_CRIT_SECTION();
    1497             : 
    1498             :     /*
    1499             :      * Now that we have removed the dead tuples from the page, once again
    1500             :      * check if the page has become all-visible.  The page is already marked
    1501             :      * dirty, exclusively locked, and, if needed, a full page image has been
    1502             :      * emitted in the log_heap_clean() above.
    1503             :      */
    1504        1122 :     if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid,
    1505             :                                  &all_frozen))
    1506        1106 :         PageSetAllVisible(page);
    1507             : 
    1508             :     /*
    1509             :      * All the changes to the heap page have been done. If the all-visible
    1510             :      * flag is now set, also set the VM all-visible bit (and, if possible, the
    1511             :      * all-frozen bit) unless this has already been done previously.
    1512             :      */
    1513        1122 :     if (PageIsAllVisible(page))
    1514             :     {
    1515        1106 :         uint8       vm_status = visibilitymap_get_status(onerel, blkno, vmbuffer);
    1516        1106 :         uint8       flags = 0;
    1517             : 
    1518             :         /* Set the VM all-frozen bit to flag, if needed */
    1519        1106 :         if ((vm_status & VISIBILITYMAP_ALL_VISIBLE) == 0)
    1520        1106 :             flags |= VISIBILITYMAP_ALL_VISIBLE;
    1521        1106 :         if ((vm_status & VISIBILITYMAP_ALL_FROZEN) == 0 && all_frozen)
    1522         650 :             flags |= VISIBILITYMAP_ALL_FROZEN;
    1523             : 
    1524        1106 :         Assert(BufferIsValid(*vmbuffer));
    1525        1106 :         if (flags != 0)
    1526        1106 :             visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr,
    1527             :                               *vmbuffer, visibility_cutoff_xid, flags);
    1528             :     }
    1529             : 
    1530        1122 :     return tupindex;
    1531             : }
    1532             : 
    1533             : /*
    1534             :  *  lazy_check_needs_freeze() -- scan page to see if any tuples
    1535             :  *                   need to be cleaned to avoid wraparound
    1536             :  *
    1537             :  * Returns true if the page needs to be vacuumed using cleanup lock.
    1538             :  * Also returns a flag indicating whether page contains any tuples at all.
    1539             :  */
    1540             : static bool
    1541           0 : lazy_check_needs_freeze(Buffer buf, bool *hastup)
    1542             : {
    1543           0 :     Page        page = BufferGetPage(buf);
    1544             :     OffsetNumber offnum,
    1545             :                 maxoff;
    1546             :     HeapTupleHeader tupleheader;
    1547             : 
    1548           0 :     *hastup = false;
    1549             : 
    1550             :     /* If we hit an uninitialized page, we want to force vacuuming it. */
    1551           0 :     if (PageIsNew(page))
    1552           0 :         return true;
    1553             : 
    1554             :     /* Quick out for ordinary empty page. */
    1555           0 :     if (PageIsEmpty(page))
    1556           0 :         return false;
    1557             : 
    1558           0 :     maxoff = PageGetMaxOffsetNumber(page);
    1559           0 :     for (offnum = FirstOffsetNumber;
    1560             :          offnum <= maxoff;
    1561           0 :          offnum = OffsetNumberNext(offnum))
    1562             :     {
    1563             :         ItemId      itemid;
    1564             : 
    1565           0 :         itemid = PageGetItemId(page, offnum);
    1566             : 
    1567             :         /* this should match hastup test in count_nondeletable_pages() */
    1568           0 :         if (ItemIdIsUsed(itemid))
    1569           0 :             *hastup = true;
    1570             : 
    1571             :         /* dead and redirect items never need freezing */
    1572           0 :         if (!ItemIdIsNormal(itemid))
    1573           0 :             continue;
    1574             : 
    1575           0 :         tupleheader = (HeapTupleHeader) PageGetItem(page, itemid);
    1576             : 
    1577           0 :         if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
    1578             :                                     MultiXactCutoff, buf))
    1579           0 :             return true;
    1580             :     }                           /* scan along page */
    1581             : 
    1582           0 :     return false;
    1583             : }
    1584             : 
    1585             : 
    1586             : /*
    1587             :  *  lazy_vacuum_index() -- vacuum one index relation.
    1588             :  *
    1589             :  *      Delete all the index entries pointing to tuples listed in
    1590             :  *      vacrelstats->dead_tuples, and update running statistics.
    1591             :  */
    1592             : static void
    1593         129 : lazy_vacuum_index(Relation indrel,
    1594             :                   IndexBulkDeleteResult **stats,
    1595             :                   LVRelStats *vacrelstats)
    1596             : {
    1597             :     IndexVacuumInfo ivinfo;
    1598             :     PGRUsage    ru0;
    1599             : 
    1600         129 :     pg_rusage_init(&ru0);
    1601             : 
    1602         129 :     ivinfo.index = indrel;
    1603         129 :     ivinfo.analyze_only = false;
    1604         129 :     ivinfo.estimated_count = true;
    1605         129 :     ivinfo.message_level = elevel;
    1606         129 :     ivinfo.num_heap_tuples = vacrelstats->old_rel_tuples;
    1607         129 :     ivinfo.strategy = vac_strategy;
    1608             : 
    1609             :     /* Do bulk deletion */
    1610         129 :     *stats = index_bulk_delete(&ivinfo, *stats,
    1611             :                                lazy_tid_reaped, (void *) vacrelstats);
    1612             : 
    1613         129 :     ereport(elevel,
    1614             :             (errmsg("scanned index \"%s\" to remove %d row versions",
    1615             :                     RelationGetRelationName(indrel),
    1616             :                     vacrelstats->num_dead_tuples),
    1617             :              errdetail_internal("%s", pg_rusage_show(&ru0))));
    1618         129 : }
    1619             : 
    1620             : /*
    1621             :  *  lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
    1622             :  */
    1623             : static void
    1624         445 : lazy_cleanup_index(Relation indrel,
    1625             :                    IndexBulkDeleteResult *stats,
    1626             :                    LVRelStats *vacrelstats)
    1627             : {
    1628             :     IndexVacuumInfo ivinfo;
    1629             :     PGRUsage    ru0;
    1630             : 
    1631         445 :     pg_rusage_init(&ru0);
    1632             : 
    1633         445 :     ivinfo.index = indrel;
    1634         445 :     ivinfo.analyze_only = false;
    1635         445 :     ivinfo.estimated_count = (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
    1636         445 :     ivinfo.message_level = elevel;
    1637         445 :     ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
    1638         445 :     ivinfo.strategy = vac_strategy;
    1639             : 
    1640         445 :     stats = index_vacuum_cleanup(&ivinfo, stats);
    1641             : 
    1642         445 :     if (!stats)
    1643         449 :         return;
    1644             : 
    1645             :     /*
    1646             :      * Now update statistics in pg_class, but only if the index says the count
    1647             :      * is accurate.
    1648             :      */
    1649         441 :     if (!stats->estimated_count)
    1650         437 :         vac_update_relstats(indrel,
    1651             :                             stats->num_pages,
    1652             :                             stats->num_index_tuples,
    1653             :                             0,
    1654             :                             false,
    1655             :                             InvalidTransactionId,
    1656             :                             InvalidMultiXactId,
    1657             :                             false);
    1658             : 
    1659         441 :     ereport(elevel,
    1660             :             (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
    1661             :                     RelationGetRelationName(indrel),
    1662             :                     stats->num_index_tuples,
    1663             :                     stats->num_pages),
    1664             :              errdetail("%.0f index row versions were removed.\n"
    1665             :                        "%u index pages have been deleted, %u are currently reusable.\n"
    1666             :                        "%s.",
    1667             :                        stats->tuples_removed,
    1668             :                        stats->pages_deleted, stats->pages_free,
    1669             :                        pg_rusage_show(&ru0))));
    1670             : 
    1671         441 :     pfree(stats);
    1672             : }
    1673             : 
    1674             : /*
    1675             :  * should_attempt_truncation - should we attempt to truncate the heap?
    1676             :  *
    1677             :  * Don't even think about it unless we have a shot at releasing a goodly
    1678             :  * number of pages.  Otherwise, the time taken isn't worth it.
    1679             :  *
    1680             :  * Also don't attempt it if we are doing early pruning/vacuuming, because a
    1681             :  * scan which cannot find a truncated heap page cannot determine that the
    1682             :  * snapshot is too old to read that page.  We might be able to get away with
    1683             :  * truncating all except one of the pages, setting its LSN to (at least) the
    1684             :  * maximum of the truncated range if we also treated an index leaf tuple
    1685             :  * pointing to a missing heap page as something to trigger the "snapshot too
    1686             :  * old" error, but that seems fragile and seems like it deserves its own patch
    1687             :  * if we consider it.
    1688             :  *
    1689             :  * This is split out so that we can test whether truncation is going to be
    1690             :  * called for before we actually do it.  If you change the logic here, be
    1691             :  * careful to depend only on fields that lazy_scan_heap updates on-the-fly.
    1692             :  */
    1693             : static bool
    1694         392 : should_attempt_truncation(LVRelStats *vacrelstats)
    1695             : {
    1696             :     BlockNumber possibly_freeable;
    1697             : 
    1698         392 :     possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages;
    1699         392 :     if (possibly_freeable > 0 &&
    1700          16 :         (possibly_freeable >= REL_TRUNCATE_MINIMUM ||
    1701          30 :          possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) &&
    1702          14 :         old_snapshot_threshold < 0)
    1703          14 :         return true;
    1704             :     else
    1705         378 :         return false;
    1706             : }
    1707             : 
    1708             : /*
    1709             :  * lazy_truncate_heap - try to truncate off any empty pages at the end
    1710             :  */
    1711             : static void
    1712          12 : lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
    1713             : {
    1714          12 :     BlockNumber old_rel_pages = vacrelstats->rel_pages;
    1715             :     BlockNumber new_rel_pages;
    1716             :     PGRUsage    ru0;
    1717             :     int         lock_retry;
    1718             : 
    1719          12 :     pg_rusage_init(&ru0);
    1720             : 
    1721             :     /* Report that we are now truncating */
    1722          12 :     pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
    1723             :                                  PROGRESS_VACUUM_PHASE_TRUNCATE);
    1724             : 
    1725             :     /*
    1726             :      * Loop until no more truncating can be done.
    1727             :      */
    1728             :     do
    1729             :     {
    1730             :         /*
    1731             :          * We need full exclusive lock on the relation in order to do
    1732             :          * truncation. If we can't get it, give up rather than waiting --- we
    1733             :          * don't want to block other backends, and we don't want to deadlock
    1734             :          * (which is quite possible considering we already hold a lower-grade
    1735             :          * lock).
    1736             :          */
    1737          12 :         vacrelstats->lock_waiter_detected = false;
    1738          12 :         lock_retry = 0;
    1739             :         while (true)
    1740             :         {
    1741          12 :             if (ConditionalLockRelation(onerel, AccessExclusiveLock))
    1742          12 :                 break;
    1743             : 
    1744             :             /*
    1745             :              * Check for interrupts while trying to (re-)acquire the exclusive
    1746             :              * lock.
    1747             :              */
    1748           0 :             CHECK_FOR_INTERRUPTS();
    1749             : 
    1750           0 :             if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT /
    1751             :                                 VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL))
    1752             :             {
    1753             :                 /*
    1754             :                  * We failed to establish the lock in the specified number of
    1755             :                  * retries. This means we give up truncating.
    1756             :                  */
    1757           0 :                 vacrelstats->lock_waiter_detected = true;
    1758           0 :                 ereport(elevel,
    1759             :                         (errmsg("\"%s\": stopping truncate due to conflicting lock request",
    1760             :                                 RelationGetRelationName(onerel))));
    1761           0 :                 return;
    1762             :             }
    1763             : 
    1764           0 :             pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L);
    1765           0 :         }
    1766             : 
    1767             :         /*
    1768             :          * Now that we have exclusive lock, look to see if the rel has grown
    1769             :          * whilst we were vacuuming with non-exclusive lock.  If so, give up;
    1770             :          * the newly added pages presumably contain non-deletable tuples.
    1771             :          */
    1772          12 :         new_rel_pages = RelationGetNumberOfBlocks(onerel);
    1773          12 :         if (new_rel_pages != old_rel_pages)
    1774             :         {
    1775             :             /*
    1776             :              * Note: we intentionally don't update vacrelstats->rel_pages with
    1777             :              * the new rel size here.  If we did, it would amount to assuming
    1778             :              * that the new pages are empty, which is unlikely. Leaving the
    1779             :              * numbers alone amounts to assuming that the new pages have the
    1780             :              * same tuple density as existing ones, which is less unlikely.
    1781             :              */
    1782           0 :             UnlockRelation(onerel, AccessExclusiveLock);
    1783           0 :             return;
    1784             :         }
    1785             : 
    1786             :         /*
    1787             :          * Scan backwards from the end to verify that the end pages actually
    1788             :          * contain no tuples.  This is *necessary*, not optional, because
    1789             :          * other backends could have added tuples to these pages whilst we
    1790             :          * were vacuuming.
    1791             :          */
    1792          12 :         new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
    1793             : 
    1794          12 :         if (new_rel_pages >= old_rel_pages)
    1795             :         {
    1796             :             /* can't do anything after all */
    1797           0 :             UnlockRelation(onerel, AccessExclusiveLock);
    1798           0 :             return;
    1799             :         }
    1800             : 
    1801             :         /*
    1802             :          * Okay to truncate.
    1803             :          */
    1804          12 :         RelationTruncate(onerel, new_rel_pages);
    1805             : 
    1806             :         /*
    1807             :          * We can release the exclusive lock as soon as we have truncated.
    1808             :          * Other backends can't safely access the relation until they have
    1809             :          * processed the smgr invalidation that smgrtruncate sent out ... but
    1810             :          * that should happen as part of standard invalidation processing once
    1811             :          * they acquire lock on the relation.
    1812             :          */
    1813          12 :         UnlockRelation(onerel, AccessExclusiveLock);
    1814             : 
    1815             :         /*
    1816             :          * Update statistics.  Here, it *is* correct to adjust rel_pages
    1817             :          * without also touching reltuples, since the tuple count wasn't
    1818             :          * changed by the truncation.
    1819             :          */
    1820          12 :         vacrelstats->pages_removed += old_rel_pages - new_rel_pages;
    1821          12 :         vacrelstats->rel_pages = new_rel_pages;
    1822             : 
    1823          12 :         ereport(elevel,
    1824             :                 (errmsg("\"%s\": truncated %u to %u pages",
    1825             :                         RelationGetRelationName(onerel),
    1826             :                         old_rel_pages, new_rel_pages),
    1827             :                  errdetail_internal("%s",
    1828             :                                     pg_rusage_show(&ru0))));
    1829          12 :         old_rel_pages = new_rel_pages;
    1830          12 :     } while (new_rel_pages > vacrelstats->nonempty_pages &&
    1831          12 :              vacrelstats->lock_waiter_detected);
    1832             : }
    1833             : 
    1834             : /*
    1835             :  * Rescan end pages to verify that they are (still) empty of tuples.
    1836             :  *
    1837             :  * Returns number of nondeletable pages (last nonempty page + 1).
    1838             :  */
    1839             : static BlockNumber
    1840          12 : count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
    1841             : {
    1842             :     BlockNumber blkno;
    1843             :     BlockNumber prefetchedUntil;
    1844             :     instr_time  starttime;
    1845             : 
    1846             :     /* Initialize the starttime if we check for conflicting lock requests */
    1847          12 :     INSTR_TIME_SET_CURRENT(starttime);
    1848             : 
    1849             :     /*
    1850             :      * Start checking blocks at what we believe relation end to be and move
    1851             :      * backwards.  (Strange coding of loop control is needed because blkno is
    1852             :      * unsigned.)  To make the scan faster, we prefetch a few blocks at a time
    1853             :      * in forward direction, so that OS-level readahead can kick in.
    1854             :      */
    1855          12 :     blkno = vacrelstats->rel_pages;
    1856             :     StaticAssertStmt((PREFETCH_SIZE & (PREFETCH_SIZE - 1)) == 0,
    1857             :                      "prefetch size must be power of 2");
    1858          12 :     prefetchedUntil = InvalidBlockNumber;
    1859         102 :     while (blkno > vacrelstats->nonempty_pages)
    1860             :     {
    1861             :         Buffer      buf;
    1862             :         Page        page;
    1863             :         OffsetNumber offnum,
    1864             :                     maxoff;
    1865             :         bool        hastup;
    1866             : 
    1867             :         /*
    1868             :          * Check if another process requests a lock on our relation. We are
    1869             :          * holding an AccessExclusiveLock here, so they will be waiting. We
    1870             :          * only do this once per VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL, and we
    1871             :          * only check if that interval has elapsed once every 32 blocks to
    1872             :          * keep the number of system calls and actual shared lock table
    1873             :          * lookups to a minimum.
    1874             :          */
    1875          78 :         if ((blkno % 32) == 0)
    1876             :         {
    1877             :             instr_time  currenttime;
    1878             :             instr_time  elapsed;
    1879             : 
    1880           2 :             INSTR_TIME_SET_CURRENT(currenttime);
    1881           2 :             elapsed = currenttime;
    1882           2 :             INSTR_TIME_SUBTRACT(elapsed, starttime);
    1883           2 :             if ((INSTR_TIME_GET_MICROSEC(elapsed) / 1000)
    1884             :                 >= VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL)
    1885             :             {
    1886           0 :                 if (LockHasWaitersRelation(onerel, AccessExclusiveLock))
    1887             :                 {
    1888           0 :                     ereport(elevel,
    1889             :                             (errmsg("\"%s\": suspending truncate due to conflicting lock request",
    1890             :                                     RelationGetRelationName(onerel))));
    1891             : 
    1892           0 :                     vacrelstats->lock_waiter_detected = true;
    1893           0 :                     return blkno;
    1894             :                 }
    1895           0 :                 starttime = currenttime;
    1896             :             }
    1897             :         }
    1898             : 
    1899             :         /*
    1900             :          * We don't insert a vacuum delay point here, because we have an
    1901             :          * exclusive lock on the table which we want to hold for as short a
    1902             :          * time as possible.  We still need to check for interrupts however.
    1903             :          */
    1904          78 :         CHECK_FOR_INTERRUPTS();
    1905             : 
    1906          78 :         blkno--;
    1907             : 
    1908             :         /* If we haven't prefetched this lot yet, do so now. */
    1909          78 :         if (prefetchedUntil > blkno)
    1910             :         {
    1911             :             BlockNumber prefetchStart;
    1912             :             BlockNumber pblkno;
    1913             : 
    1914          14 :             prefetchStart = blkno & ~(PREFETCH_SIZE - 1);
    1915         210 :             for (pblkno = prefetchStart; pblkno <= blkno; pblkno++)
    1916             :             {
    1917         196 :                 PrefetchBuffer(onerel, MAIN_FORKNUM, pblkno);
    1918         196 :                 CHECK_FOR_INTERRUPTS();
    1919             :             }
    1920          14 :             prefetchedUntil = prefetchStart;
    1921             :         }
    1922             : 
    1923          78 :         buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
    1924             :                                  RBM_NORMAL, vac_strategy);
    1925             : 
    1926             :         /* In this phase we only need shared access to the buffer */
    1927          78 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
    1928             : 
    1929          78 :         page = BufferGetPage(buf);
    1930             : 
    1931          78 :         if (PageIsNew(page) || PageIsEmpty(page))
    1932             :         {
    1933             :             /* PageIsNew probably shouldn't happen... */
    1934           0 :             UnlockReleaseBuffer(buf);
    1935           0 :             continue;
    1936             :         }
    1937             : 
    1938          78 :         hastup = false;
    1939          78 :         maxoff = PageGetMaxOffsetNumber(page);
    1940        2473 :         for (offnum = FirstOffsetNumber;
    1941             :              offnum <= maxoff;
    1942        2317 :              offnum = OffsetNumberNext(offnum))
    1943             :         {
    1944             :             ItemId      itemid;
    1945             : 
    1946        2317 :             itemid = PageGetItemId(page, offnum);
    1947             : 
    1948             :             /*
    1949             :              * Note: any non-unused item should be taken as a reason to keep
    1950             :              * this page.  We formerly thought that DEAD tuples could be
    1951             :              * thrown away, but that's not so, because we'd not have cleaned
    1952             :              * out their index entries.
    1953             :              */
    1954        2317 :             if (ItemIdIsUsed(itemid))
    1955             :             {
    1956           0 :                 hastup = true;
    1957           0 :                 break;          /* can stop scanning */
    1958             :             }
    1959             :         }                       /* scan along page */
    1960             : 
    1961          78 :         UnlockReleaseBuffer(buf);
    1962             : 
    1963             :         /* Done scanning if we found a tuple here */
    1964          78 :         if (hastup)
    1965           0 :             return blkno + 1;
    1966             :     }
    1967             : 
    1968             :     /*
    1969             :      * If we fall out of the loop, all the previously-thought-to-be-empty
    1970             :      * pages still are; we need not bother to look at the last known-nonempty
    1971             :      * page.
    1972             :      */
    1973          12 :     return vacrelstats->nonempty_pages;
    1974             : }
    1975             : 
    1976             : /*
    1977             :  * lazy_space_alloc - space allocation decisions for lazy vacuum
    1978             :  *
    1979             :  * See the comments at the head of this file for rationale.
    1980             :  */
    1981             : static void
    1982         390 : lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
    1983             : {
    1984             :     long        maxtuples;
    1985         793 :     int         vac_work_mem = IsAutoVacuumWorkerProcess() &&
    1986          13 :     autovacuum_work_mem != -1 ?
    1987         390 :     autovacuum_work_mem : maintenance_work_mem;
    1988             : 
    1989         390 :     if (vacrelstats->hasindex)
    1990             :     {
    1991         296 :         maxtuples = (vac_work_mem * 1024L) / sizeof(ItemPointerData);
    1992         296 :         maxtuples = Min(maxtuples, INT_MAX);
    1993         296 :         maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
    1994             : 
    1995             :         /* curious coding here to ensure the multiplication can't overflow */
    1996         296 :         if ((BlockNumber) (maxtuples / LAZY_ALLOC_TUPLES) > relblocks)
    1997         296 :             maxtuples = relblocks * LAZY_ALLOC_TUPLES;
    1998             : 
    1999             :         /* stay sane if small maintenance_work_mem */
    2000         296 :         maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
    2001             :     }
    2002             :     else
    2003             :     {
    2004          94 :         maxtuples = MaxHeapTuplesPerPage;
    2005             :     }
    2006             : 
    2007         390 :     vacrelstats->num_dead_tuples = 0;
    2008         390 :     vacrelstats->max_dead_tuples = (int) maxtuples;
    2009         390 :     vacrelstats->dead_tuples = (ItemPointer)
    2010         390 :         palloc(maxtuples * sizeof(ItemPointerData));
    2011         390 : }
    2012             : 
    2013             : /*
    2014             :  * lazy_record_dead_tuple - remember one deletable tuple
    2015             :  */
    2016             : static void
    2017       82231 : lazy_record_dead_tuple(LVRelStats *vacrelstats,
    2018             :                        ItemPointer itemptr)
    2019             : {
    2020             :     /*
    2021             :      * The array shouldn't overflow under normal behavior, but perhaps it
    2022             :      * could if we are given a really small maintenance_work_mem. In that
    2023             :      * case, just forget the last few tuples (we'll get 'em next time).
    2024             :      */
    2025       82231 :     if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
    2026             :     {
    2027       82231 :         vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
    2028       82231 :         vacrelstats->num_dead_tuples++;
    2029       82231 :         pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
    2030       82231 :                                      vacrelstats->num_dead_tuples);
    2031             :     }
    2032       82231 : }
    2033             : 
    2034             : /*
    2035             :  *  lazy_tid_reaped() -- is a particular tid deletable?
    2036             :  *
    2037             :  *      This has the right signature to be an IndexBulkDeleteCallback.
    2038             :  *
    2039             :  *      Assumes dead_tuples array is in sorted order.
    2040             :  */
    2041             : static bool
    2042      373464 : lazy_tid_reaped(ItemPointer itemptr, void *state)
    2043             : {
    2044      373464 :     LVRelStats *vacrelstats = (LVRelStats *) state;
    2045             :     ItemPointer res;
    2046             : 
    2047      746928 :     res = (ItemPointer) bsearch((void *) itemptr,
    2048      373464 :                                 (void *) vacrelstats->dead_tuples,
    2049      373464 :                                 vacrelstats->num_dead_tuples,
    2050             :                                 sizeof(ItemPointerData),
    2051             :                                 vac_cmp_itemptr);
    2052             : 
    2053      373464 :     return (res != NULL);
    2054             : }
    2055             : 
    2056             : /*
    2057             :  * Comparator routines for use with qsort() and bsearch().
    2058             :  */
    2059             : static int
    2060     4078155 : vac_cmp_itemptr(const void *left, const void *right)
    2061             : {
    2062             :     BlockNumber lblk,
    2063             :                 rblk;
    2064             :     OffsetNumber loff,
    2065             :                 roff;
    2066             : 
    2067     4078155 :     lblk = ItemPointerGetBlockNumber((ItemPointer) left);
    2068     4078155 :     rblk = ItemPointerGetBlockNumber((ItemPointer) right);
    2069             : 
    2070     4078155 :     if (lblk < rblk)
    2071     1370599 :         return -1;
    2072     2707556 :     if (lblk > rblk)
    2073     1297363 :         return 1;
    2074             : 
    2075     1410193 :     loff = ItemPointerGetOffsetNumber((ItemPointer) left);
    2076     1410193 :     roff = ItemPointerGetOffsetNumber((ItemPointer) right);
    2077             : 
    2078     1410193 :     if (loff < roff)
    2079      653374 :         return -1;
    2080      756819 :     if (loff > roff)
    2081      576486 :         return 1;
    2082             : 
    2083      180333 :     return 0;
    2084             : }
    2085             : 
    2086             : /*
    2087             :  * Check if every tuple in the given page is visible to all current and future
    2088             :  * transactions. Also return the visibility_cutoff_xid which is the highest
    2089             :  * xmin amongst the visible tuples.  Set *all_frozen to true if every tuple
    2090             :  * on this page is frozen.
    2091             :  */
    2092             : static bool
    2093        1122 : heap_page_is_all_visible(Relation rel, Buffer buf,
    2094             :                          TransactionId *visibility_cutoff_xid,
    2095             :                          bool *all_frozen)
    2096             : {
    2097        1122 :     Page        page = BufferGetPage(buf);
    2098        1122 :     BlockNumber blockno = BufferGetBlockNumber(buf);
    2099             :     OffsetNumber offnum,
    2100             :                 maxoff;
    2101        1122 :     bool        all_visible = true;
    2102             : 
    2103        1122 :     *visibility_cutoff_xid = InvalidTransactionId;
    2104        1122 :     *all_frozen = true;
    2105             : 
    2106             :     /*
    2107             :      * This is a stripped down version of the line pointer scan in
    2108             :      * lazy_scan_heap(). So if you change anything here, also check that code.
    2109             :      */
    2110        1122 :     maxoff = PageGetMaxOffsetNumber(page);
    2111      109748 :     for (offnum = FirstOffsetNumber;
    2112      107519 :          offnum <= maxoff && all_visible;
    2113      107504 :          offnum = OffsetNumberNext(offnum))
    2114             :     {
    2115             :         ItemId      itemid;
    2116             :         HeapTupleData tuple;
    2117             : 
    2118      107504 :         itemid = PageGetItemId(page, offnum);
    2119             : 
    2120             :         /* Unused or redirect line pointers are of no interest */
    2121      107504 :         if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
    2122       88091 :             continue;
    2123             : 
    2124       19413 :         ItemPointerSet(&(tuple.t_self), blockno, offnum);
    2125             : 
    2126             :         /*
    2127             :          * Dead line pointers can have index pointers pointing to them. So
    2128             :          * they can't be treated as visible
    2129             :          */
    2130       19413 :         if (ItemIdIsDead(itemid))
    2131             :         {
    2132           0 :             all_visible = false;
    2133           0 :             *all_frozen = false;
    2134           0 :             break;
    2135             :         }
    2136             : 
    2137       19413 :         Assert(ItemIdIsNormal(itemid));
    2138             : 
    2139       19413 :         tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
    2140       19413 :         tuple.t_len = ItemIdGetLength(itemid);
    2141       19413 :         tuple.t_tableOid = RelationGetRelid(rel);
    2142             : 
    2143       19413 :         switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
    2144             :         {
    2145             :             case HEAPTUPLE_LIVE:
    2146             :                 {
    2147             :                     TransactionId xmin;
    2148             : 
    2149             :                     /* Check comments in lazy_scan_heap. */
    2150       19397 :                     if (!HeapTupleHeaderXminCommitted(tuple.t_data))
    2151             :                     {
    2152           0 :                         all_visible = false;
    2153           0 :                         *all_frozen = false;
    2154           0 :                         break;
    2155             :                     }
    2156             : 
    2157             :                     /*
    2158             :                      * The inserter definitely committed. But is it old enough
    2159             :                      * that everyone sees it as committed?
    2160             :                      */
    2161       19397 :                     xmin = HeapTupleHeaderGetXmin(tuple.t_data);
    2162       19397 :                     if (!TransactionIdPrecedes(xmin, OldestXmin))
    2163             :                     {
    2164           0 :                         all_visible = false;
    2165           0 :                         *all_frozen = false;
    2166           0 :                         break;
    2167             :                     }
    2168             : 
    2169             :                     /* Track newest xmin on page. */
    2170       19397 :                     if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
    2171        1997 :                         *visibility_cutoff_xid = xmin;
    2172             : 
    2173             :                     /* Check whether this tuple is already frozen or not */
    2174       22794 :                     if (all_visible && *all_frozen &&
    2175        3397 :                         heap_tuple_needs_eventual_freeze(tuple.t_data))
    2176         464 :                         *all_frozen = false;
    2177             :                 }
    2178       19397 :                 break;
    2179             : 
    2180             :             case HEAPTUPLE_DEAD:
    2181             :             case HEAPTUPLE_RECENTLY_DEAD:
    2182             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
    2183             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    2184             :                 {
    2185          16 :                     all_visible = false;
    2186          16 :                     *all_frozen = false;
    2187          16 :                     break;
    2188             :                 }
    2189             :             default:
    2190           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    2191             :                 break;
    2192             :         }
    2193             :     }                           /* scan along page */
    2194             : 
    2195        1122 :     return all_visible;
    2196             : }

Generated by: LCOV version 1.11