LCOV - code coverage report
Current view: top level - src/backend/commands - cluster.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 409 461 88.7 %
Date: 2017-09-29 15:12:54 Functions: 11 11 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * cluster.c
       4             :  *    CLUSTER a table on an index.  This is now also used for VACUUM FULL.
       5             :  *
       6             :  * There is hardly anything left of Paul Brown's original implementation...
       7             :  *
       8             :  *
       9             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994-5, Regents of the University of California
      11             :  *
      12             :  *
      13             :  * IDENTIFICATION
      14             :  *    src/backend/commands/cluster.c
      15             :  *
      16             :  *-------------------------------------------------------------------------
      17             :  */
      18             : #include "postgres.h"
      19             : 
      20             : #include "access/amapi.h"
      21             : #include "access/multixact.h"
      22             : #include "access/relscan.h"
      23             : #include "access/rewriteheap.h"
      24             : #include "access/transam.h"
      25             : #include "access/tuptoaster.h"
      26             : #include "access/xact.h"
      27             : #include "access/xlog.h"
      28             : #include "catalog/pg_am.h"
      29             : #include "catalog/catalog.h"
      30             : #include "catalog/dependency.h"
      31             : #include "catalog/heap.h"
      32             : #include "catalog/index.h"
      33             : #include "catalog/namespace.h"
      34             : #include "catalog/objectaccess.h"
      35             : #include "catalog/toasting.h"
      36             : #include "commands/cluster.h"
      37             : #include "commands/tablecmds.h"
      38             : #include "commands/vacuum.h"
      39             : #include "miscadmin.h"
      40             : #include "optimizer/planner.h"
      41             : #include "storage/bufmgr.h"
      42             : #include "storage/lmgr.h"
      43             : #include "storage/predicate.h"
      44             : #include "storage/smgr.h"
      45             : #include "utils/acl.h"
      46             : #include "utils/fmgroids.h"
      47             : #include "utils/inval.h"
      48             : #include "utils/lsyscache.h"
      49             : #include "utils/memutils.h"
      50             : #include "utils/pg_rusage.h"
      51             : #include "utils/relmapper.h"
      52             : #include "utils/snapmgr.h"
      53             : #include "utils/syscache.h"
      54             : #include "utils/tqual.h"
      55             : #include "utils/tuplesort.h"
      56             : 
      57             : 
      58             : /*
      59             :  * This struct is used to pass around the information on tables to be
      60             :  * clustered. We need this so we can make a list of them when invoked without
      61             :  * a specific table/index pair.
      62             :  */
      63             : typedef struct
      64             : {
      65             :     Oid         tableOid;
      66             :     Oid         indexOid;
      67             : } RelToCluster;
      68             : 
      69             : 
      70             : static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
      71             : static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
      72             :                bool verbose, bool *pSwapToastByContent,
      73             :                TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
      74             : static List *get_tables_to_cluster(MemoryContext cluster_context);
      75             : static void reform_and_rewrite_tuple(HeapTuple tuple,
      76             :                          TupleDesc oldTupDesc, TupleDesc newTupDesc,
      77             :                          Datum *values, bool *isnull,
      78             :                          bool newRelHasOids, RewriteState rwstate);
      79             : 
      80             : 
      81             : /*---------------------------------------------------------------------------
      82             :  * This cluster code allows for clustering multiple tables at once. Because
      83             :  * of this, we cannot just run everything on a single transaction, or we
      84             :  * would be forced to acquire exclusive locks on all the tables being
      85             :  * clustered, simultaneously --- very likely leading to deadlock.
      86             :  *
      87             :  * To solve this we follow a similar strategy to VACUUM code,
      88             :  * clustering each relation in a separate transaction. For this to work,
      89             :  * we need to:
      90             :  *  - provide a separate memory context so that we can pass information in
      91             :  *    a way that survives across transactions
      92             :  *  - start a new transaction every time a new relation is clustered
      93             :  *  - check for validity of the information on to-be-clustered relations,
      94             :  *    as someone might have deleted a relation behind our back, or
      95             :  *    clustered one on a different index
      96             :  *  - end the transaction
      97             :  *
      98             :  * The single-relation case does not have any such overhead.
      99             :  *
     100             :  * We also allow a relation to be specified without index.  In that case,
     101             :  * the indisclustered bit will be looked up, and an ERROR will be thrown
     102             :  * if there is no index with the bit set.
     103             :  *---------------------------------------------------------------------------
     104             :  */
     105             : void
     106          11 : cluster(ClusterStmt *stmt, bool isTopLevel)
     107             : {
     108          11 :     if (stmt->relation != NULL)
     109             :     {
     110             :         /* This is the single-relation case. */
     111             :         Oid         tableOid,
     112          10 :                     indexOid = InvalidOid;
     113             :         Relation    rel;
     114             : 
     115             :         /* Find, lock, and check permissions on the table */
     116          10 :         tableOid = RangeVarGetRelidExtended(stmt->relation,
     117             :                                             AccessExclusiveLock,
     118             :                                             false, false,
     119             :                                             RangeVarCallbackOwnsTable, NULL);
     120          10 :         rel = heap_open(tableOid, NoLock);
     121             : 
     122             :         /*
     123             :          * Reject clustering a remote temp table ... their local buffer
     124             :          * manager is not going to cope.
     125             :          */
     126          10 :         if (RELATION_IS_OTHER_TEMP(rel))
     127           0 :             ereport(ERROR,
     128             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     129             :                      errmsg("cannot cluster temporary tables of other sessions")));
     130             : 
     131          10 :         if (stmt->indexname == NULL)
     132             :         {
     133             :             ListCell   *index;
     134             : 
     135             :             /* We need to find the index that has indisclustered set. */
     136           4 :             foreach(index, RelationGetIndexList(rel))
     137             :             {
     138             :                 HeapTuple   idxtuple;
     139             :                 Form_pg_index indexForm;
     140             : 
     141           3 :                 indexOid = lfirst_oid(index);
     142           3 :                 idxtuple = SearchSysCache1(INDEXRELID,
     143             :                                            ObjectIdGetDatum(indexOid));
     144           3 :                 if (!HeapTupleIsValid(idxtuple))
     145           0 :                     elog(ERROR, "cache lookup failed for index %u", indexOid);
     146           3 :                 indexForm = (Form_pg_index) GETSTRUCT(idxtuple);
     147           3 :                 if (indexForm->indisclustered)
     148             :                 {
     149           2 :                     ReleaseSysCache(idxtuple);
     150           2 :                     break;
     151             :                 }
     152           1 :                 ReleaseSysCache(idxtuple);
     153           1 :                 indexOid = InvalidOid;
     154             :             }
     155             : 
     156           3 :             if (!OidIsValid(indexOid))
     157           1 :                 ereport(ERROR,
     158             :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     159             :                          errmsg("there is no previously clustered index for table \"%s\"",
     160             :                                 stmt->relation->relname)));
     161             :         }
     162             :         else
     163             :         {
     164             :             /*
     165             :              * The index is expected to be in the same namespace as the
     166             :              * relation.
     167             :              */
     168           7 :             indexOid = get_relname_relid(stmt->indexname,
     169           7 :                                          rel->rd_rel->relnamespace);
     170           7 :             if (!OidIsValid(indexOid))
     171           0 :                 ereport(ERROR,
     172             :                         (errcode(ERRCODE_UNDEFINED_OBJECT),
     173             :                          errmsg("index \"%s\" for table \"%s\" does not exist",
     174             :                                 stmt->indexname, stmt->relation->relname)));
     175             :         }
     176             : 
     177             :         /* close relation, keep lock till commit */
     178           9 :         heap_close(rel, NoLock);
     179             : 
     180             :         /* Do the job. */
     181           9 :         cluster_rel(tableOid, indexOid, false, stmt->verbose);
     182             :     }
     183             :     else
     184             :     {
     185             :         /*
     186             :          * This is the "multi relation" case. We need to cluster all tables
     187             :          * that have some index with indisclustered set.
     188             :          */
     189             :         MemoryContext cluster_context;
     190             :         List       *rvs;
     191             :         ListCell   *rv;
     192             : 
     193             :         /*
     194             :          * We cannot run this form of CLUSTER inside a user transaction block;
     195             :          * we'd be holding locks way too long.
     196             :          */
     197           1 :         PreventTransactionChain(isTopLevel, "CLUSTER");
     198             : 
     199             :         /*
     200             :          * Create special memory context for cross-transaction storage.
     201             :          *
     202             :          * Since it is a child of PortalContext, it will go away even in case
     203             :          * of error.
     204             :          */
     205           1 :         cluster_context = AllocSetContextCreate(PortalContext,
     206             :                                                 "Cluster",
     207             :                                                 ALLOCSET_DEFAULT_SIZES);
     208             : 
     209             :         /*
     210             :          * Build the list of relations to cluster.  Note that this lives in
     211             :          * cluster_context.
     212             :          */
     213           1 :         rvs = get_tables_to_cluster(cluster_context);
     214             : 
     215             :         /* Commit to get out of starting transaction */
     216           1 :         PopActiveSnapshot();
     217           1 :         CommitTransactionCommand();
     218             : 
     219             :         /* Ok, now that we've got them all, cluster them one by one */
     220           2 :         foreach(rv, rvs)
     221             :         {
     222           1 :             RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
     223             : 
     224             :             /* Start a new transaction for each relation. */
     225           1 :             StartTransactionCommand();
     226             :             /* functions in indexes may want a snapshot set */
     227           1 :             PushActiveSnapshot(GetTransactionSnapshot());
     228             :             /* Do the job. */
     229           1 :             cluster_rel(rvtc->tableOid, rvtc->indexOid, true, stmt->verbose);
     230           1 :             PopActiveSnapshot();
     231           1 :             CommitTransactionCommand();
     232             :         }
     233             : 
     234             :         /* Start a new transaction for the cleanup work. */
     235           1 :         StartTransactionCommand();
     236             : 
     237             :         /* Clean up working storage */
     238           1 :         MemoryContextDelete(cluster_context);
     239             :     }
     240          10 : }
     241             : 
     242             : /*
     243             :  * cluster_rel
     244             :  *
     245             :  * This clusters the table by creating a new, clustered table and
     246             :  * swapping the relfilenodes of the new table and the old table, so
     247             :  * the OID of the original table is preserved.  Thus we do not lose
     248             :  * GRANT, inheritance nor references to this table (this was a bug
     249             :  * in releases through 7.3).
     250             :  *
     251             :  * Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
     252             :  * the new table, it's better to create the indexes afterwards than to fill
     253             :  * them incrementally while we load the table.
     254             :  *
     255             :  * If indexOid is InvalidOid, the table will be rewritten in physical order
     256             :  * instead of index order.  This is the new implementation of VACUUM FULL,
     257             :  * and error messages should refer to the operation as VACUUM not CLUSTER.
     258             :  */
     259             : void
     260          22 : cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose)
     261             : {
     262             :     Relation    OldHeap;
     263             : 
     264             :     /* Check for user-requested abort. */
     265          22 :     CHECK_FOR_INTERRUPTS();
     266             : 
     267             :     /*
     268             :      * We grab exclusive access to the target rel and index for the duration
     269             :      * of the transaction.  (This is redundant for the single-transaction
     270             :      * case, since cluster() already did it.)  The index lock is taken inside
     271             :      * check_index_is_clusterable.
     272             :      */
     273          22 :     OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
     274             : 
     275             :     /* If the table has gone away, we can skip processing it */
     276          22 :     if (!OldHeap)
     277           0 :         return;
     278             : 
     279             :     /*
     280             :      * Since we may open a new transaction for each relation, we have to check
     281             :      * that the relation still is what we think it is.
     282             :      *
     283             :      * If this is a single-transaction CLUSTER, we can skip these tests. We
     284             :      * *must* skip the one on indisclustered since it would reject an attempt
     285             :      * to cluster a not-previously-clustered index.
     286             :      */
     287          22 :     if (recheck)
     288             :     {
     289             :         HeapTuple   tuple;
     290             :         Form_pg_index indexForm;
     291             : 
     292             :         /* Check that the user still owns the relation */
     293           1 :         if (!pg_class_ownercheck(tableOid, GetUserId()))
     294             :         {
     295           0 :             relation_close(OldHeap, AccessExclusiveLock);
     296           0 :             return;
     297             :         }
     298             : 
     299             :         /*
     300             :          * Silently skip a temp table for a remote session.  Only doing this
     301             :          * check in the "recheck" case is appropriate (which currently means
     302             :          * somebody is executing a database-wide CLUSTER), because there is
     303             :          * another check in cluster() which will stop any attempt to cluster
     304             :          * remote temp tables by name.  There is another check in cluster_rel
     305             :          * which is redundant, but we leave it for extra safety.
     306             :          */
     307           1 :         if (RELATION_IS_OTHER_TEMP(OldHeap))
     308             :         {
     309           0 :             relation_close(OldHeap, AccessExclusiveLock);
     310           0 :             return;
     311             :         }
     312             : 
     313           1 :         if (OidIsValid(indexOid))
     314             :         {
     315             :             /*
     316             :              * Check that the index still exists
     317             :              */
     318           1 :             if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
     319             :             {
     320           0 :                 relation_close(OldHeap, AccessExclusiveLock);
     321           0 :                 return;
     322             :             }
     323             : 
     324             :             /*
     325             :              * Check that the index is still the one with indisclustered set.
     326             :              */
     327           1 :             tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
     328           1 :             if (!HeapTupleIsValid(tuple))   /* probably can't happen */
     329             :             {
     330           0 :                 relation_close(OldHeap, AccessExclusiveLock);
     331           0 :                 return;
     332             :             }
     333           1 :             indexForm = (Form_pg_index) GETSTRUCT(tuple);
     334           1 :             if (!indexForm->indisclustered)
     335             :             {
     336           0 :                 ReleaseSysCache(tuple);
     337           0 :                 relation_close(OldHeap, AccessExclusiveLock);
     338           0 :                 return;
     339             :             }
     340           1 :             ReleaseSysCache(tuple);
     341             :         }
     342             :     }
     343             : 
     344             :     /*
     345             :      * We allow VACUUM FULL, but not CLUSTER, on shared catalogs.  CLUSTER
     346             :      * would work in most respects, but the index would only get marked as
     347             :      * indisclustered in the current database, leading to unexpected behavior
     348             :      * if CLUSTER were later invoked in another database.
     349             :      */
     350          22 :     if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
     351           0 :         ereport(ERROR,
     352             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     353             :                  errmsg("cannot cluster a shared catalog")));
     354             : 
     355             :     /*
     356             :      * Don't process temp tables of other backends ... their local buffer
     357             :      * manager is not going to cope.
     358             :      */
     359          22 :     if (RELATION_IS_OTHER_TEMP(OldHeap))
     360             :     {
     361           0 :         if (OidIsValid(indexOid))
     362           0 :             ereport(ERROR,
     363             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     364             :                      errmsg("cannot cluster temporary tables of other sessions")));
     365             :         else
     366           0 :             ereport(ERROR,
     367             :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     368             :                      errmsg("cannot vacuum temporary tables of other sessions")));
     369             :     }
     370             : 
     371             :     /*
     372             :      * Also check for active uses of the relation in the current transaction,
     373             :      * including open scans and pending AFTER trigger events.
     374             :      */
     375          22 :     CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
     376             : 
     377             :     /* Check heap and index are valid to cluster on */
     378          22 :     if (OidIsValid(indexOid))
     379          10 :         check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock);
     380             : 
     381             :     /*
     382             :      * Quietly ignore the request if this is a materialized view which has not
     383             :      * been populated from its query. No harm is done because there is no data
     384             :      * to deal with, and we don't want to throw an error if this is part of a
     385             :      * multi-relation request -- for example, CLUSTER was run on the entire
     386             :      * database.
     387             :      */
     388          22 :     if (OldHeap->rd_rel->relkind == RELKIND_MATVIEW &&
     389           0 :         !RelationIsPopulated(OldHeap))
     390             :     {
     391           0 :         relation_close(OldHeap, AccessExclusiveLock);
     392           0 :         return;
     393             :     }
     394             : 
     395             :     /*
     396             :      * All predicate locks on the tuples or pages are about to be made
     397             :      * invalid, because we move tuples around.  Promote them to relation
     398             :      * locks.  Predicate locks on indexes will be promoted when they are
     399             :      * reindexed.
     400             :      */
     401          22 :     TransferPredicateLocksToHeapRelation(OldHeap);
     402             : 
     403             :     /* rebuild_relation does all the dirty work */
     404          22 :     rebuild_relation(OldHeap, indexOid, verbose);
     405             : 
     406             :     /* NB: rebuild_relation does heap_close() on OldHeap */
     407             : }
     408             : 
     409             : /*
     410             :  * Verify that the specified heap and index are valid to cluster on
     411             :  *
     412             :  * Side effect: obtains lock on the index.  The caller may
     413             :  * in some cases already have AccessExclusiveLock on the table, but
     414             :  * not in all cases so we can't rely on the table-level lock for
     415             :  * protection here.
     416             :  */
     417             : void
     418          13 : check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck, LOCKMODE lockmode)
     419             : {
     420             :     Relation    OldIndex;
     421             : 
     422          13 :     OldIndex = index_open(indexOid, lockmode);
     423             : 
     424             :     /*
     425             :      * Check that index is in fact an index on the given relation
     426             :      */
     427          26 :     if (OldIndex->rd_index == NULL ||
     428          13 :         OldIndex->rd_index->indrelid != RelationGetRelid(OldHeap))
     429           0 :         ereport(ERROR,
     430             :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
     431             :                  errmsg("\"%s\" is not an index for table \"%s\"",
     432             :                         RelationGetRelationName(OldIndex),
     433             :                         RelationGetRelationName(OldHeap))));
     434             : 
     435             :     /* Index AM must allow clustering */
     436          13 :     if (!OldIndex->rd_amroutine->amclusterable)
     437           0 :         ereport(ERROR,
     438             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     439             :                  errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
     440             :                         RelationGetRelationName(OldIndex))));
     441             : 
     442             :     /*
     443             :      * Disallow clustering on incomplete indexes (those that might not index
     444             :      * every row of the relation).  We could relax this by making a separate
     445             :      * seqscan pass over the table to copy the missing rows, but that seems
     446             :      * expensive and tedious.
     447             :      */
     448          13 :     if (!heap_attisnull(OldIndex->rd_indextuple, Anum_pg_index_indpred))
     449           0 :         ereport(ERROR,
     450             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     451             :                  errmsg("cannot cluster on partial index \"%s\"",
     452             :                         RelationGetRelationName(OldIndex))));
     453             : 
     454             :     /*
     455             :      * Disallow if index is left over from a failed CREATE INDEX CONCURRENTLY;
     456             :      * it might well not contain entries for every heap row, or might not even
     457             :      * be internally consistent.  (But note that we don't check indcheckxmin;
     458             :      * the worst consequence of following broken HOT chains would be that we
     459             :      * might put recently-dead tuples out-of-order in the new table, and there
     460             :      * is little harm in that.)
     461             :      */
     462          13 :     if (!IndexIsValid(OldIndex->rd_index))
     463           0 :         ereport(ERROR,
     464             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     465             :                  errmsg("cannot cluster on invalid index \"%s\"",
     466             :                         RelationGetRelationName(OldIndex))));
     467             : 
     468             :     /* Drop relcache refcnt on OldIndex, but keep lock */
     469          13 :     index_close(OldIndex, NoLock);
     470          13 : }
     471             : 
     472             : /*
     473             :  * mark_index_clustered: mark the specified index as the one clustered on
     474             :  *
     475             :  * With indexOid == InvalidOid, will mark all indexes of rel not-clustered.
     476             :  */
     477             : void
     478          15 : mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
     479             : {
     480             :     HeapTuple   indexTuple;
     481             :     Form_pg_index indexForm;
     482             :     Relation    pg_index;
     483             :     ListCell   *index;
     484             : 
     485             :     /*
     486             :      * If the index is already marked clustered, no need to do anything.
     487             :      */
     488          15 :     if (OidIsValid(indexOid))
     489             :     {
     490          13 :         indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
     491          13 :         if (!HeapTupleIsValid(indexTuple))
     492           0 :             elog(ERROR, "cache lookup failed for index %u", indexOid);
     493          13 :         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
     494             : 
     495          13 :         if (indexForm->indisclustered)
     496             :         {
     497           4 :             ReleaseSysCache(indexTuple);
     498          19 :             return;
     499             :         }
     500             : 
     501           9 :         ReleaseSysCache(indexTuple);
     502             :     }
     503             : 
     504             :     /*
     505             :      * Check each index of the relation and set/clear the bit as needed.
     506             :      */
     507          11 :     pg_index = heap_open(IndexRelationId, RowExclusiveLock);
     508             : 
     509          34 :     foreach(index, RelationGetIndexList(rel))
     510             :     {
     511          23 :         Oid         thisIndexOid = lfirst_oid(index);
     512             : 
     513          23 :         indexTuple = SearchSysCacheCopy1(INDEXRELID,
     514             :                                          ObjectIdGetDatum(thisIndexOid));
     515          23 :         if (!HeapTupleIsValid(indexTuple))
     516           0 :             elog(ERROR, "cache lookup failed for index %u", thisIndexOid);
     517          23 :         indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
     518             : 
     519             :         /*
     520             :          * Unset the bit if set.  We know it's wrong because we checked this
     521             :          * earlier.
     522             :          */
     523          23 :         if (indexForm->indisclustered)
     524             :         {
     525           3 :             indexForm->indisclustered = false;
     526           3 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     527             :         }
     528          20 :         else if (thisIndexOid == indexOid)
     529             :         {
     530             :             /* this was checked earlier, but let's be real sure */
     531           9 :             if (!IndexIsValid(indexForm))
     532           0 :                 elog(ERROR, "cannot cluster on invalid index %u", indexOid);
     533           9 :             indexForm->indisclustered = true;
     534           9 :             CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
     535             :         }
     536             : 
     537          23 :         InvokeObjectPostAlterHookArg(IndexRelationId, thisIndexOid, 0,
     538             :                                      InvalidOid, is_internal);
     539             : 
     540          23 :         heap_freetuple(indexTuple);
     541             :     }
     542             : 
     543          11 :     heap_close(pg_index, RowExclusiveLock);
     544             : }
     545             : 
     546             : /*
     547             :  * rebuild_relation: rebuild an existing relation in index or physical order
     548             :  *
     549             :  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
     550             :  * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
     551             :  *
     552             :  * NB: this routine closes OldHeap at the right time; caller should not.
     553             :  */
     554             : static void
     555          22 : rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
     556             : {
     557          22 :     Oid         tableOid = RelationGetRelid(OldHeap);
     558          22 :     Oid         tableSpace = OldHeap->rd_rel->reltablespace;
     559             :     Oid         OIDNewHeap;
     560             :     char        relpersistence;
     561             :     bool        is_system_catalog;
     562             :     bool        swap_toast_by_content;
     563             :     TransactionId frozenXid;
     564             :     MultiXactId cutoffMulti;
     565             : 
     566             :     /* Mark the correct index as clustered */
     567          22 :     if (OidIsValid(indexOid))
     568          10 :         mark_index_clustered(OldHeap, indexOid, true);
     569             : 
     570             :     /* Remember info about rel before closing OldHeap */
     571          22 :     relpersistence = OldHeap->rd_rel->relpersistence;
     572          22 :     is_system_catalog = IsSystemRelation(OldHeap);
     573             : 
     574             :     /* Close relcache entry, but keep lock until transaction commit */
     575          22 :     heap_close(OldHeap, NoLock);
     576             : 
     577             :     /* Create the transient table that will receive the re-ordered data */
     578          22 :     OIDNewHeap = make_new_heap(tableOid, tableSpace,
     579             :                                relpersistence,
     580             :                                AccessExclusiveLock);
     581             : 
     582             :     /* Copy the heap data into the new table in the desired order */
     583          22 :     copy_heap_data(OIDNewHeap, tableOid, indexOid, verbose,
     584             :                    &swap_toast_by_content, &frozenXid, &cutoffMulti);
     585             : 
     586             :     /*
     587             :      * Swap the physical files of the target and transient tables, then
     588             :      * rebuild the target's indexes and throw away the transient table.
     589             :      */
     590          22 :     finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
     591             :                      swap_toast_by_content, false, true,
     592             :                      frozenXid, cutoffMulti,
     593             :                      relpersistence);
     594          21 : }
     595             : 
     596             : 
     597             : /*
     598             :  * Create the transient table that will be filled with new data during
     599             :  * CLUSTER, ALTER TABLE, and similar operations.  The transient table
     600             :  * duplicates the logical structure of the OldHeap, but is placed in
     601             :  * NewTableSpace which might be different from OldHeap's.  Also, it's built
     602             :  * with the specified persistence, which might differ from the original's.
     603             :  *
     604             :  * After this, the caller should load the new heap with transferred/modified
     605             :  * data, then call finish_heap_swap to complete the operation.
     606             :  */
     607             : Oid
     608         114 : make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
     609             :               LOCKMODE lockmode)
     610             : {
     611             :     TupleDesc   OldHeapDesc;
     612             :     char        NewHeapName[NAMEDATALEN];
     613             :     Oid         OIDNewHeap;
     614             :     Oid         toastid;
     615             :     Relation    OldHeap;
     616             :     HeapTuple   tuple;
     617             :     Datum       reloptions;
     618             :     bool        isNull;
     619             :     Oid         namespaceid;
     620             : 
     621         114 :     OldHeap = heap_open(OIDOldHeap, lockmode);
     622         114 :     OldHeapDesc = RelationGetDescr(OldHeap);
     623             : 
     624             :     /*
     625             :      * Note that the NewHeap will not receive any of the defaults or
     626             :      * constraints associated with the OldHeap; we don't need 'em, and there's
     627             :      * no reason to spend cycles inserting them into the catalogs only to
     628             :      * delete them.
     629             :      */
     630             : 
     631             :     /*
     632             :      * But we do want to use reloptions of the old heap for new heap.
     633             :      */
     634         114 :     tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(OIDOldHeap));
     635         114 :     if (!HeapTupleIsValid(tuple))
     636           0 :         elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
     637         114 :     reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     638             :                                  &isNull);
     639         114 :     if (isNull)
     640         114 :         reloptions = (Datum) 0;
     641             : 
     642         114 :     if (relpersistence == RELPERSISTENCE_TEMP)
     643          12 :         namespaceid = LookupCreationNamespace("pg_temp");
     644             :     else
     645         102 :         namespaceid = RelationGetNamespace(OldHeap);
     646             : 
     647             :     /*
     648             :      * Create the new heap, using a temporary name in the same namespace as
     649             :      * the existing table.  NOTE: there is some risk of collision with user
     650             :      * relnames.  Working around this seems more trouble than it's worth; in
     651             :      * particular, we can't create the new heap in a different namespace from
     652             :      * the old, or we will have problems with the TEMP status of temp tables.
     653             :      *
     654             :      * Note: the new heap is not a shared relation, even if we are rebuilding
     655             :      * a shared rel.  However, we do make the new heap mapped if the source is
     656             :      * mapped.  This simplifies swap_relation_files, and is absolutely
     657             :      * necessary for rebuilding pg_class, for reasons explained there.
     658             :      */
     659         114 :     snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
     660             : 
     661         228 :     OIDNewHeap = heap_create_with_catalog(NewHeapName,
     662             :                                           namespaceid,
     663             :                                           NewTableSpace,
     664             :                                           InvalidOid,
     665             :                                           InvalidOid,
     666             :                                           InvalidOid,
     667         114 :                                           OldHeap->rd_rel->relowner,
     668             :                                           OldHeapDesc,
     669             :                                           NIL,
     670             :                                           RELKIND_RELATION,
     671             :                                           relpersistence,
     672             :                                           false,
     673         114 :                                           RelationIsMapped(OldHeap),
     674             :                                           true,
     675             :                                           0,
     676             :                                           ONCOMMIT_NOOP,
     677             :                                           reloptions,
     678             :                                           false,
     679             :                                           true,
     680             :                                           true,
     681             :                                           NULL);
     682         114 :     Assert(OIDNewHeap != InvalidOid);
     683             : 
     684         114 :     ReleaseSysCache(tuple);
     685             : 
     686             :     /*
     687             :      * Advance command counter so that the newly-created relation's catalog
     688             :      * tuples will be visible to heap_open.
     689             :      */
     690         114 :     CommandCounterIncrement();
     691             : 
     692             :     /*
     693             :      * If necessary, create a TOAST table for the new relation.
     694             :      *
     695             :      * If the relation doesn't have a TOAST table already, we can't need one
     696             :      * for the new relation.  The other way around is possible though: if some
     697             :      * wide columns have been dropped, NewHeapCreateToastTable can decide that
     698             :      * no TOAST table is needed for the new table.
     699             :      *
     700             :      * Note that NewHeapCreateToastTable ends with CommandCounterIncrement, so
     701             :      * that the TOAST table will be visible for insertion.
     702             :      */
     703         114 :     toastid = OldHeap->rd_rel->reltoastrelid;
     704         114 :     if (OidIsValid(toastid))
     705             :     {
     706             :         /* keep the existing toast table's reloptions, if any */
     707          53 :         tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(toastid));
     708          53 :         if (!HeapTupleIsValid(tuple))
     709           0 :             elog(ERROR, "cache lookup failed for relation %u", toastid);
     710          53 :         reloptions = SysCacheGetAttr(RELOID, tuple, Anum_pg_class_reloptions,
     711             :                                      &isNull);
     712          53 :         if (isNull)
     713          53 :             reloptions = (Datum) 0;
     714             : 
     715          53 :         NewHeapCreateToastTable(OIDNewHeap, reloptions, lockmode);
     716             : 
     717          53 :         ReleaseSysCache(tuple);
     718             :     }
     719             : 
     720         114 :     heap_close(OldHeap, NoLock);
     721             : 
     722         114 :     return OIDNewHeap;
     723             : }
     724             : 
     725             : /*
     726             :  * Do the physical copying of heap data.
     727             :  *
     728             :  * There are three output parameters:
     729             :  * *pSwapToastByContent is set true if toast tables must be swapped by content.
     730             :  * *pFreezeXid receives the TransactionId used as freeze cutoff point.
     731             :  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
     732             :  */
     733             : static void
     734          22 : copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
     735             :                bool *pSwapToastByContent, TransactionId *pFreezeXid,
     736             :                MultiXactId *pCutoffMulti)
     737             : {
     738             :     Relation    NewHeap,
     739             :                 OldHeap,
     740             :                 OldIndex;
     741             :     TupleDesc   oldTupDesc;
     742             :     TupleDesc   newTupDesc;
     743             :     int         natts;
     744             :     Datum      *values;
     745             :     bool       *isnull;
     746             :     IndexScanDesc indexScan;
     747             :     HeapScanDesc heapScan;
     748             :     bool        use_wal;
     749             :     bool        is_system_catalog;
     750             :     TransactionId OldestXmin;
     751             :     TransactionId FreezeXid;
     752             :     MultiXactId MultiXactCutoff;
     753             :     RewriteState rwstate;
     754             :     bool        use_sort;
     755             :     Tuplesortstate *tuplesort;
     756          22 :     double      num_tuples = 0,
     757          22 :                 tups_vacuumed = 0,
     758          22 :                 tups_recently_dead = 0;
     759          22 :     int         elevel = verbose ? INFO : DEBUG2;
     760             :     PGRUsage    ru0;
     761             : 
     762          22 :     pg_rusage_init(&ru0);
     763             : 
     764             :     /*
     765             :      * Open the relations we need.
     766             :      */
     767          22 :     NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
     768          22 :     OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
     769          22 :     if (OidIsValid(OIDOldIndex))
     770          10 :         OldIndex = index_open(OIDOldIndex, AccessExclusiveLock);
     771             :     else
     772          12 :         OldIndex = NULL;
     773             : 
     774             :     /*
     775             :      * Their tuple descriptors should be exactly alike, but here we only need
     776             :      * assume that they have the same number of columns.
     777             :      */
     778          22 :     oldTupDesc = RelationGetDescr(OldHeap);
     779          22 :     newTupDesc = RelationGetDescr(NewHeap);
     780          22 :     Assert(newTupDesc->natts == oldTupDesc->natts);
     781             : 
     782             :     /* Preallocate values/isnull arrays */
     783          22 :     natts = newTupDesc->natts;
     784          22 :     values = (Datum *) palloc(natts * sizeof(Datum));
     785          22 :     isnull = (bool *) palloc(natts * sizeof(bool));
     786             : 
     787             :     /*
     788             :      * If the OldHeap has a toast table, get lock on the toast table to keep
     789             :      * it from being vacuumed.  This is needed because autovacuum processes
     790             :      * toast tables independently of their main tables, with no lock on the
     791             :      * latter.  If an autovacuum were to start on the toast table after we
     792             :      * compute our OldestXmin below, it would use a later OldestXmin, and then
     793             :      * possibly remove as DEAD toast tuples belonging to main tuples we think
     794             :      * are only RECENTLY_DEAD.  Then we'd fail while trying to copy those
     795             :      * tuples.
     796             :      *
     797             :      * We don't need to open the toast relation here, just lock it.  The lock
     798             :      * will be held till end of transaction.
     799             :      */
     800          22 :     if (OldHeap->rd_rel->reltoastrelid)
     801           4 :         LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
     802             : 
     803             :     /*
     804             :      * We need to log the copied data in WAL iff WAL archiving/streaming is
     805             :      * enabled AND it's a WAL-logged rel.
     806             :      */
     807          22 :     use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
     808             : 
     809             :     /* use_wal off requires smgr_targblock be initially invalid */
     810          22 :     Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
     811             : 
     812             :     /*
     813             :      * If both tables have TOAST tables, perform toast swap by content.  It is
     814             :      * possible that the old table has a toast table but the new one doesn't,
     815             :      * if toastable columns have been dropped.  In that case we have to do
     816             :      * swap by links.  This is okay because swap by content is only essential
     817             :      * for system catalogs, and we don't support schema changes for them.
     818             :      */
     819          22 :     if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
     820             :     {
     821           4 :         *pSwapToastByContent = true;
     822             : 
     823             :         /*
     824             :          * When doing swap by content, any toast pointers written into NewHeap
     825             :          * must use the old toast table's OID, because that's where the toast
     826             :          * data will eventually be found.  Set this up by setting rd_toastoid.
     827             :          * This also tells toast_save_datum() to preserve the toast value
     828             :          * OIDs, which we want so as not to invalidate toast pointers in
     829             :          * system catalog caches, and to avoid making multiple copies of a
     830             :          * single toast value.
     831             :          *
     832             :          * Note that we must hold NewHeap open until we are done writing data,
     833             :          * since the relcache will not guarantee to remember this setting once
     834             :          * the relation is closed.  Also, this technique depends on the fact
     835             :          * that no one will try to read from the NewHeap until after we've
     836             :          * finished writing it and swapping the rels --- otherwise they could
     837             :          * follow the toast pointers to the wrong place.  (It would actually
     838             :          * work for values copied over from the old toast table, but not for
     839             :          * any values that we toast which were previously not toasted.)
     840             :          */
     841           4 :         NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
     842             :     }
     843             :     else
     844          18 :         *pSwapToastByContent = false;
     845             : 
     846             :     /*
     847             :      * Compute xids used to freeze and weed out dead tuples and multixacts.
     848             :      * Since we're going to rewrite the whole table anyway, there's no reason
     849             :      * not to be aggressive about this.
     850             :      */
     851          22 :     vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
     852             :                           &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
     853             :                           NULL);
     854             : 
     855             :     /*
     856             :      * FreezeXid will become the table's new relfrozenxid, and that mustn't go
     857             :      * backwards, so take the max.
     858             :      */
     859          22 :     if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
     860           8 :         FreezeXid = OldHeap->rd_rel->relfrozenxid;
     861             : 
     862             :     /*
     863             :      * MultiXactCutoff, similarly, shouldn't go backwards either.
     864             :      */
     865          22 :     if (MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid))
     866           0 :         MultiXactCutoff = OldHeap->rd_rel->relminmxid;
     867             : 
     868             :     /* return selected values to caller */
     869          22 :     *pFreezeXid = FreezeXid;
     870          22 :     *pCutoffMulti = MultiXactCutoff;
     871             : 
     872             :     /* Remember if it's a system catalog */
     873          22 :     is_system_catalog = IsSystemRelation(OldHeap);
     874             : 
     875             :     /* Initialize the rewrite operation */
     876          22 :     rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
     877             :                                  MultiXactCutoff, use_wal);
     878             : 
     879             :     /*
     880             :      * Decide whether to use an indexscan or seqscan-and-optional-sort to scan
     881             :      * the OldHeap.  We know how to use a sort to duplicate the ordering of a
     882             :      * btree index, and will use seqscan-and-sort for that case if the planner
     883             :      * tells us it's cheaper.  Otherwise, always indexscan if an index is
     884             :      * provided, else plain seqscan.
     885             :      */
     886          22 :     if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
     887          10 :         use_sort = plan_cluster_use_sort(OIDOldHeap, OIDOldIndex);
     888             :     else
     889          12 :         use_sort = false;
     890             : 
     891             :     /* Set up sorting if wanted */
     892          22 :     if (use_sort)
     893           5 :         tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
     894             :                                             maintenance_work_mem, false);
     895             :     else
     896          17 :         tuplesort = NULL;
     897             : 
     898             :     /*
     899             :      * Prepare to scan the OldHeap.  To ensure we see recently-dead tuples
     900             :      * that still need to be copied, we scan with SnapshotAny and use
     901             :      * HeapTupleSatisfiesVacuum for the visibility test.
     902             :      */
     903          22 :     if (OldIndex != NULL && !use_sort)
     904             :     {
     905           5 :         heapScan = NULL;
     906           5 :         indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
     907           5 :         index_rescan(indexScan, NULL, 0, NULL, 0);
     908             :     }
     909             :     else
     910             :     {
     911          17 :         heapScan = heap_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
     912          17 :         indexScan = NULL;
     913             :     }
     914             : 
     915             :     /* Log what we're doing */
     916          22 :     if (indexScan != NULL)
     917           5 :         ereport(elevel,
     918             :                 (errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
     919             :                         get_namespace_name(RelationGetNamespace(OldHeap)),
     920             :                         RelationGetRelationName(OldHeap),
     921             :                         RelationGetRelationName(OldIndex))));
     922          17 :     else if (tuplesort != NULL)
     923           5 :         ereport(elevel,
     924             :                 (errmsg("clustering \"%s.%s\" using sequential scan and sort",
     925             :                         get_namespace_name(RelationGetNamespace(OldHeap)),
     926             :                         RelationGetRelationName(OldHeap))));
     927             :     else
     928          12 :         ereport(elevel,
     929             :                 (errmsg("vacuuming \"%s.%s\"",
     930             :                         get_namespace_name(RelationGetNamespace(OldHeap)),
     931             :                         RelationGetRelationName(OldHeap))));
     932             : 
     933             :     /*
     934             :      * Scan through the OldHeap, either in OldIndex order or sequentially;
     935             :      * copy each tuple into the NewHeap, or transiently to the tuplesort
     936             :      * module.  Note that we don't bother sorting dead tuples (they won't get
     937             :      * to the new table anyway).
     938             :      */
     939             :     for (;;)
     940             :     {
     941             :         HeapTuple   tuple;
     942             :         Buffer      buf;
     943             :         bool        isdead;
     944             : 
     945       27156 :         CHECK_FOR_INTERRUPTS();
     946             : 
     947       27156 :         if (indexScan != NULL)
     948             :         {
     949          21 :             tuple = index_getnext(indexScan, ForwardScanDirection);
     950          21 :             if (tuple == NULL)
     951           5 :                 break;
     952             : 
     953             :             /* Since we used no scan keys, should never need to recheck */
     954          16 :             if (indexScan->xs_recheck)
     955           0 :                 elog(ERROR, "CLUSTER does not support lossy index conditions");
     956             : 
     957          16 :             buf = indexScan->xs_cbuf;
     958             :         }
     959             :         else
     960             :         {
     961       27135 :             tuple = heap_getnext(heapScan, ForwardScanDirection);
     962       27135 :             if (tuple == NULL)
     963          17 :                 break;
     964             : 
     965       27118 :             buf = heapScan->rs_cbuf;
     966             :         }
     967             : 
     968       27134 :         LockBuffer(buf, BUFFER_LOCK_SHARE);
     969             : 
     970       27134 :         switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
     971             :         {
     972             :             case HEAPTUPLE_DEAD:
     973             :                 /* Definitely dead */
     974        4373 :                 isdead = true;
     975        4373 :                 break;
     976             :             case HEAPTUPLE_RECENTLY_DEAD:
     977        2056 :                 tups_recently_dead += 1;
     978             :                 /* fall through */
     979             :             case HEAPTUPLE_LIVE:
     980             :                 /* Live or recently dead, must copy it */
     981       22752 :                 isdead = false;
     982       22752 :                 break;
     983             :             case HEAPTUPLE_INSERT_IN_PROGRESS:
     984             : 
     985             :                 /*
     986             :                  * Since we hold exclusive lock on the relation, normally the
     987             :                  * only way to see this is if it was inserted earlier in our
     988             :                  * own transaction.  However, it can happen in system
     989             :                  * catalogs, since we tend to release write lock before commit
     990             :                  * there.  Give a warning if neither case applies; but in any
     991             :                  * case we had better copy it.
     992             :                  */
     993           7 :                 if (!is_system_catalog &&
     994           3 :                     !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
     995           0 :                     elog(WARNING, "concurrent insert in progress within table \"%s\"",
     996             :                          RelationGetRelationName(OldHeap));
     997             :                 /* treat as live */
     998           4 :                 isdead = false;
     999           4 :                 break;
    1000             :             case HEAPTUPLE_DELETE_IN_PROGRESS:
    1001             : 
    1002             :                 /*
    1003             :                  * Similar situation to INSERT_IN_PROGRESS case.
    1004             :                  */
    1005          10 :                 if (!is_system_catalog &&
    1006           5 :                     !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
    1007           0 :                     elog(WARNING, "concurrent delete in progress within table \"%s\"",
    1008             :                          RelationGetRelationName(OldHeap));
    1009             :                 /* treat as recently dead */
    1010           5 :                 tups_recently_dead += 1;
    1011           5 :                 isdead = false;
    1012           5 :                 break;
    1013             :             default:
    1014           0 :                 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
    1015             :                 isdead = false; /* keep compiler quiet */
    1016             :                 break;
    1017             :         }
    1018             : 
    1019       27134 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
    1020             : 
    1021       27134 :         if (isdead)
    1022             :         {
    1023        4373 :             tups_vacuumed += 1;
    1024             :             /* heap rewrite module still needs to see it... */
    1025        4373 :             if (rewrite_heap_dead_tuple(rwstate, tuple))
    1026             :             {
    1027             :                 /* A previous recently-dead tuple is now known dead */
    1028           0 :                 tups_vacuumed += 1;
    1029           0 :                 tups_recently_dead -= 1;
    1030             :             }
    1031        4373 :             continue;
    1032             :         }
    1033             : 
    1034       22761 :         num_tuples += 1;
    1035       22761 :         if (tuplesort != NULL)
    1036       20042 :             tuplesort_putheaptuple(tuplesort, tuple);
    1037             :         else
    1038        2719 :             reform_and_rewrite_tuple(tuple,
    1039             :                                      oldTupDesc, newTupDesc,
    1040             :                                      values, isnull,
    1041        2719 :                                      NewHeap->rd_rel->relhasoids, rwstate);
    1042       27134 :     }
    1043             : 
    1044          22 :     if (indexScan != NULL)
    1045           5 :         index_endscan(indexScan);
    1046          22 :     if (heapScan != NULL)
    1047          17 :         heap_endscan(heapScan);
    1048             : 
    1049             :     /*
    1050             :      * In scan-and-sort mode, complete the sort, then read out all live tuples
    1051             :      * from the tuplestore and write them to the new relation.
    1052             :      */
    1053          22 :     if (tuplesort != NULL)
    1054             :     {
    1055           5 :         tuplesort_performsort(tuplesort);
    1056             : 
    1057             :         for (;;)
    1058             :         {
    1059             :             HeapTuple   tuple;
    1060             : 
    1061       20047 :             CHECK_FOR_INTERRUPTS();
    1062             : 
    1063       20047 :             tuple = tuplesort_getheaptuple(tuplesort, true);
    1064       20047 :             if (tuple == NULL)
    1065           5 :                 break;
    1066             : 
    1067       20042 :             reform_and_rewrite_tuple(tuple,
    1068             :                                      oldTupDesc, newTupDesc,
    1069             :                                      values, isnull,
    1070       20042 :                                      NewHeap->rd_rel->relhasoids, rwstate);
    1071       20042 :         }
    1072             : 
    1073           5 :         tuplesort_end(tuplesort);
    1074             :     }
    1075             : 
    1076             :     /* Write out any remaining tuples, and fsync if needed */
    1077          22 :     end_heap_rewrite(rwstate);
    1078             : 
    1079             :     /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
    1080          22 :     NewHeap->rd_toastoid = InvalidOid;
    1081             : 
    1082             :     /* Log what we did */
    1083          22 :     ereport(elevel,
    1084             :             (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
    1085             :                     RelationGetRelationName(OldHeap),
    1086             :                     tups_vacuumed, num_tuples,
    1087             :                     RelationGetNumberOfBlocks(OldHeap)),
    1088             :              errdetail("%.0f dead row versions cannot be removed yet.\n"
    1089             :                        "%s.",
    1090             :                        tups_recently_dead,
    1091             :                        pg_rusage_show(&ru0))));
    1092             : 
    1093             :     /* Clean up */
    1094          22 :     pfree(values);
    1095          22 :     pfree(isnull);
    1096             : 
    1097          22 :     if (OldIndex != NULL)
    1098          10 :         index_close(OldIndex, NoLock);
    1099          22 :     heap_close(OldHeap, NoLock);
    1100          22 :     heap_close(NewHeap, NoLock);
    1101          22 : }
    1102             : 
    1103             : /*
    1104             :  * Swap the physical files of two given relations.
    1105             :  *
    1106             :  * We swap the physical identity (reltablespace, relfilenode) while keeping the
    1107             :  * same logical identities of the two relations.  relpersistence is also
    1108             :  * swapped, which is critical since it determines where buffers live for each
    1109             :  * relation.
    1110             :  *
    1111             :  * We can swap associated TOAST data in either of two ways: recursively swap
    1112             :  * the physical content of the toast tables (and their indexes), or swap the
    1113             :  * TOAST links in the given relations' pg_class entries.  The former is needed
    1114             :  * to manage rewrites of shared catalogs (where we cannot change the pg_class
    1115             :  * links) while the latter is the only way to handle cases in which a toast
    1116             :  * table is added or removed altogether.
    1117             :  *
    1118             :  * Additionally, the first relation is marked with relfrozenxid set to
    1119             :  * frozenXid.  It seems a bit ugly to have this here, but the caller would
    1120             :  * have to do it anyway, so having it here saves a heap_update.  Note: in
    1121             :  * the swap-toast-links case, we assume we don't need to change the toast
    1122             :  * table's relfrozenxid: the new version of the toast table should already
    1123             :  * have relfrozenxid set to RecentXmin, which is good enough.
    1124             :  *
    1125             :  * Lastly, if r2 and its toast table and toast index (if any) are mapped,
    1126             :  * their OIDs are emitted into mapped_tables[].  This is hacky but beats
    1127             :  * having to look the information up again later in finish_heap_swap.
    1128             :  */
    1129             : static void
    1130         113 : swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
    1131             :                     bool swap_toast_by_content,
    1132             :                     bool is_internal,
    1133             :                     TransactionId frozenXid,
    1134             :                     MultiXactId cutoffMulti,
    1135             :                     Oid *mapped_tables)
    1136             : {
    1137             :     Relation    relRelation;
    1138             :     HeapTuple   reltup1,
    1139             :                 reltup2;
    1140             :     Form_pg_class relform1,
    1141             :                 relform2;
    1142             :     Oid         relfilenode1,
    1143             :                 relfilenode2;
    1144             :     Oid         swaptemp;
    1145             :     char        swptmpchr;
    1146             : 
    1147             :     /* We need writable copies of both pg_class tuples. */
    1148         113 :     relRelation = heap_open(RelationRelationId, RowExclusiveLock);
    1149             : 
    1150         113 :     reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
    1151         113 :     if (!HeapTupleIsValid(reltup1))
    1152           0 :         elog(ERROR, "cache lookup failed for relation %u", r1);
    1153         113 :     relform1 = (Form_pg_class) GETSTRUCT(reltup1);
    1154             : 
    1155         113 :     reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
    1156         113 :     if (!HeapTupleIsValid(reltup2))
    1157           0 :         elog(ERROR, "cache lookup failed for relation %u", r2);
    1158         113 :     relform2 = (Form_pg_class) GETSTRUCT(reltup2);
    1159             : 
    1160         113 :     relfilenode1 = relform1->relfilenode;
    1161         113 :     relfilenode2 = relform2->relfilenode;
    1162             : 
    1163         113 :     if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
    1164             :     {
    1165             :         /*
    1166             :          * Normal non-mapped relations: swap relfilenodes, reltablespaces,
    1167             :          * relpersistence
    1168             :          */
    1169         111 :         Assert(!target_is_pg_class);
    1170             : 
    1171         111 :         swaptemp = relform1->relfilenode;
    1172         111 :         relform1->relfilenode = relform2->relfilenode;
    1173         111 :         relform2->relfilenode = swaptemp;
    1174             : 
    1175         111 :         swaptemp = relform1->reltablespace;
    1176         111 :         relform1->reltablespace = relform2->reltablespace;
    1177         111 :         relform2->reltablespace = swaptemp;
    1178             : 
    1179         111 :         swptmpchr = relform1->relpersistence;
    1180         111 :         relform1->relpersistence = relform2->relpersistence;
    1181         111 :         relform2->relpersistence = swptmpchr;
    1182             : 
    1183             :         /* Also swap toast links, if we're swapping by links */
    1184         222 :         if (!swap_toast_by_content)
    1185             :         {
    1186          99 :             swaptemp = relform1->reltoastrelid;
    1187          99 :             relform1->reltoastrelid = relform2->reltoastrelid;
    1188          99 :             relform2->reltoastrelid = swaptemp;
    1189             :         }
    1190             :     }
    1191             :     else
    1192             :     {
    1193             :         /*
    1194             :          * Mapped-relation case.  Here we have to swap the relation mappings
    1195             :          * instead of modifying the pg_class columns.  Both must be mapped.
    1196             :          */
    1197           2 :         if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2))
    1198           0 :             elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
    1199             :                  NameStr(relform1->relname));
    1200             : 
    1201             :         /*
    1202             :          * We can't change the tablespace nor persistence of a mapped rel, and
    1203             :          * we can't handle toast link swapping for one either, because we must
    1204             :          * not apply any critical changes to its pg_class row.  These cases
    1205             :          * should be prevented by upstream permissions tests, so these checks
    1206             :          * are non-user-facing emergency backstop.
    1207             :          */
    1208           2 :         if (relform1->reltablespace != relform2->reltablespace)
    1209           0 :             elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
    1210             :                  NameStr(relform1->relname));
    1211           2 :         if (relform1->relpersistence != relform2->relpersistence)
    1212           0 :             elog(ERROR, "cannot change persistence of mapped relation \"%s\"",
    1213             :                  NameStr(relform1->relname));
    1214           4 :         if (!swap_toast_by_content &&
    1215           4 :             (relform1->reltoastrelid || relform2->reltoastrelid))
    1216           0 :             elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
    1217             :                  NameStr(relform1->relname));
    1218             : 
    1219             :         /*
    1220             :          * Fetch the mappings --- shouldn't fail, but be paranoid
    1221             :          */
    1222           2 :         relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared);
    1223           2 :         if (!OidIsValid(relfilenode1))
    1224           0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1225             :                  NameStr(relform1->relname), r1);
    1226           2 :         relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared);
    1227           2 :         if (!OidIsValid(relfilenode2))
    1228           0 :             elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
    1229             :                  NameStr(relform2->relname), r2);
    1230             : 
    1231             :         /*
    1232             :          * Send replacement mappings to relmapper.  Note these won't actually
    1233             :          * take effect until CommandCounterIncrement.
    1234             :          */
    1235           2 :         RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false);
    1236           2 :         RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false);
    1237             : 
    1238             :         /* Pass OIDs of mapped r2 tables back to caller */
    1239           2 :         *mapped_tables++ = r2;
    1240             :     }
    1241             : 
    1242             :     /*
    1243             :      * In the case of a shared catalog, these next few steps will only affect
    1244             :      * our own database's pg_class row; but that's okay, because they are all
    1245             :      * noncritical updates.  That's also an important fact for the case of a
    1246             :      * mapped catalog, because it's possible that we'll commit the map change
    1247             :      * and then fail to commit the pg_class update.
    1248             :      */
    1249             : 
    1250             :     /* set rel1's frozen Xid and minimum MultiXid */
    1251         113 :     if (relform1->relkind != RELKIND_INDEX)
    1252             :     {
    1253         109 :         Assert(TransactionIdIsNormal(frozenXid));
    1254         109 :         relform1->relfrozenxid = frozenXid;
    1255         109 :         Assert(MultiXactIdIsValid(cutoffMulti));
    1256         109 :         relform1->relminmxid = cutoffMulti;
    1257             :     }
    1258             : 
    1259             :     /* swap size statistics too, since new rel has freshly-updated stats */
    1260             :     {
    1261             :         int32       swap_pages;
    1262             :         float4      swap_tuples;
    1263             :         int32       swap_allvisible;
    1264             : 
    1265         113 :         swap_pages = relform1->relpages;
    1266         113 :         relform1->relpages = relform2->relpages;
    1267         113 :         relform2->relpages = swap_pages;
    1268             : 
    1269         113 :         swap_tuples = relform1->reltuples;
    1270         113 :         relform1->reltuples = relform2->reltuples;
    1271         113 :         relform2->reltuples = swap_tuples;
    1272             : 
    1273         113 :         swap_allvisible = relform1->relallvisible;
    1274         113 :         relform1->relallvisible = relform2->relallvisible;
    1275         113 :         relform2->relallvisible = swap_allvisible;
    1276             :     }
    1277             : 
    1278             :     /*
    1279             :      * Update the tuples in pg_class --- unless the target relation of the
    1280             :      * swap is pg_class itself.  In that case, there is zero point in making
    1281             :      * changes because we'd be updating the old data that we're about to throw
    1282             :      * away.  Because the real work being done here for a mapped relation is
    1283             :      * just to change the relation map settings, it's all right to not update
    1284             :      * the pg_class rows in this case. The most important changes will instead
    1285             :      * performed later, in finish_heap_swap() itself.
    1286             :      */
    1287         113 :     if (!target_is_pg_class)
    1288             :     {
    1289             :         CatalogIndexState indstate;
    1290             : 
    1291         112 :         indstate = CatalogOpenIndexes(relRelation);
    1292         112 :         CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1,
    1293             :                                    indstate);
    1294         112 :         CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2,
    1295             :                                    indstate);
    1296         112 :         CatalogCloseIndexes(indstate);
    1297             :     }
    1298             :     else
    1299             :     {
    1300             :         /* no update ... but we do still need relcache inval */
    1301           1 :         CacheInvalidateRelcacheByTuple(reltup1);
    1302           1 :         CacheInvalidateRelcacheByTuple(reltup2);
    1303             :     }
    1304             : 
    1305             :     /*
    1306             :      * Post alter hook for modified relations. The change to r2 is always
    1307             :      * internal, but r1 depends on the invocation context.
    1308             :      */
    1309         113 :     InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0,
    1310             :                                  InvalidOid, is_internal);
    1311         113 :     InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0,
    1312             :                                  InvalidOid, true);
    1313             : 
    1314             :     /*
    1315             :      * If we have toast tables associated with the relations being swapped,
    1316             :      * deal with them too.
    1317             :      */
    1318         113 :     if (relform1->reltoastrelid || relform2->reltoastrelid)
    1319             :     {
    1320          49 :         if (swap_toast_by_content)
    1321             :         {
    1322           4 :             if (relform1->reltoastrelid && relform2->reltoastrelid)
    1323             :             {
    1324             :                 /* Recursively swap the contents of the toast tables */
    1325           4 :                 swap_relation_files(relform1->reltoastrelid,
    1326             :                                     relform2->reltoastrelid,
    1327             :                                     target_is_pg_class,
    1328             :                                     swap_toast_by_content,
    1329             :                                     is_internal,
    1330             :                                     frozenXid,
    1331             :                                     cutoffMulti,
    1332             :                                     mapped_tables);
    1333             :             }
    1334             :             else
    1335             :             {
    1336             :                 /* caller messed up */
    1337           0 :                 elog(ERROR, "cannot swap toast files by content when there's only one");
    1338             :             }
    1339             :         }
    1340             :         else
    1341             :         {
    1342             :             /*
    1343             :              * We swapped the ownership links, so we need to change dependency
    1344             :              * data to match.
    1345             :              *
    1346             :              * NOTE: it is possible that only one table has a toast table.
    1347             :              *
    1348             :              * NOTE: at present, a TOAST table's only dependency is the one on
    1349             :              * its owning table.  If more are ever created, we'd need to use
    1350             :              * something more selective than deleteDependencyRecordsFor() to
    1351             :              * get rid of just the link we want.
    1352             :              */
    1353             :             ObjectAddress baseobject,
    1354             :                         toastobject;
    1355             :             long        count;
    1356             : 
    1357             :             /*
    1358             :              * We disallow this case for system catalogs, to avoid the
    1359             :              * possibility that the catalog we're rebuilding is one of the
    1360             :              * ones the dependency changes would change.  It's too late to be
    1361             :              * making any data changes to the target catalog.
    1362             :              */
    1363          45 :             if (IsSystemClass(r1, relform1))
    1364           0 :                 elog(ERROR, "cannot swap toast files by links for system catalogs");
    1365             : 
    1366             :             /* Delete old dependencies */
    1367          45 :             if (relform1->reltoastrelid)
    1368             :             {
    1369          42 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1370             :                                                    relform1->reltoastrelid,
    1371             :                                                    false);
    1372          42 :                 if (count != 1)
    1373           0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1374             :                          count);
    1375             :             }
    1376          45 :             if (relform2->reltoastrelid)
    1377             :             {
    1378          45 :                 count = deleteDependencyRecordsFor(RelationRelationId,
    1379             :                                                    relform2->reltoastrelid,
    1380             :                                                    false);
    1381          45 :                 if (count != 1)
    1382           0 :                     elog(ERROR, "expected one dependency record for TOAST table, found %ld",
    1383             :                          count);
    1384             :             }
    1385             : 
    1386             :             /* Register new dependencies */
    1387          45 :             baseobject.classId = RelationRelationId;
    1388          45 :             baseobject.objectSubId = 0;
    1389          45 :             toastobject.classId = RelationRelationId;
    1390          45 :             toastobject.objectSubId = 0;
    1391             : 
    1392          45 :             if (relform1->reltoastrelid)
    1393             :             {
    1394          42 :                 baseobject.objectId = r1;
    1395          42 :                 toastobject.objectId = relform1->reltoastrelid;
    1396          42 :                 recordDependencyOn(&toastobject, &baseobject,
    1397             :                                    DEPENDENCY_INTERNAL);
    1398             :             }
    1399             : 
    1400          45 :             if (relform2->reltoastrelid)
    1401             :             {
    1402          45 :                 baseobject.objectId = r2;
    1403          45 :                 toastobject.objectId = relform2->reltoastrelid;
    1404          45 :                 recordDependencyOn(&toastobject, &baseobject,
    1405             :                                    DEPENDENCY_INTERNAL);
    1406             :             }
    1407             :         }
    1408             :     }
    1409             : 
    1410             :     /*
    1411             :      * If we're swapping two toast tables by content, do the same for their
    1412             :      * valid index. The swap can actually be safely done only if the relations
    1413             :      * have indexes.
    1414             :      */
    1415         125 :     if (swap_toast_by_content &&
    1416          16 :         relform1->relkind == RELKIND_TOASTVALUE &&
    1417           4 :         relform2->relkind == RELKIND_TOASTVALUE)
    1418             :     {
    1419             :         Oid         toastIndex1,
    1420             :                     toastIndex2;
    1421             : 
    1422             :         /* Get valid index for each relation */
    1423           4 :         toastIndex1 = toast_get_valid_index(r1,
    1424             :                                             AccessExclusiveLock);
    1425           4 :         toastIndex2 = toast_get_valid_index(r2,
    1426             :                                             AccessExclusiveLock);
    1427             : 
    1428           4 :         swap_relation_files(toastIndex1,
    1429             :                             toastIndex2,
    1430             :                             target_is_pg_class,
    1431             :                             swap_toast_by_content,
    1432             :                             is_internal,
    1433             :                             InvalidTransactionId,
    1434             :                             InvalidMultiXactId,
    1435             :                             mapped_tables);
    1436             :     }
    1437             : 
    1438             :     /* Clean up. */
    1439         113 :     heap_freetuple(reltup1);
    1440         113 :     heap_freetuple(reltup2);
    1441             : 
    1442         113 :     heap_close(relRelation, RowExclusiveLock);
    1443             : 
    1444             :     /*
    1445             :      * Close both relcache entries' smgr links.  We need this kluge because
    1446             :      * both links will be invalidated during upcoming CommandCounterIncrement.
    1447             :      * Whichever of the rels is the second to be cleared will have a dangling
    1448             :      * reference to the other's smgr entry.  Rather than trying to avoid this
    1449             :      * by ordering operations just so, it's easiest to close the links first.
    1450             :      * (Fortunately, since one of the entries is local in our transaction,
    1451             :      * it's sufficient to clear out our own relcache this way; the problem
    1452             :      * cannot arise for other backends when they see our update on the
    1453             :      * non-transient relation.)
    1454             :      *
    1455             :      * Caution: the placement of this step interacts with the decision to
    1456             :      * handle toast rels by recursion.  When we are trying to rebuild pg_class
    1457             :      * itself, the smgr close on pg_class must happen after all accesses in
    1458             :      * this function.
    1459             :      */
    1460         113 :     RelationCloseSmgrByOid(r1);
    1461         113 :     RelationCloseSmgrByOid(r2);
    1462         113 : }
    1463             : 
    1464             : /*
    1465             :  * Remove the transient table that was built by make_new_heap, and finish
    1466             :  * cleaning up (including rebuilding all indexes on the old heap).
    1467             :  */
    1468             : void
    1469         105 : finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
    1470             :                  bool is_system_catalog,
    1471             :                  bool swap_toast_by_content,
    1472             :                  bool check_constraints,
    1473             :                  bool is_internal,
    1474             :                  TransactionId frozenXid,
    1475             :                  MultiXactId cutoffMulti,
    1476             :                  char newrelpersistence)
    1477             : {
    1478             :     ObjectAddress object;
    1479             :     Oid         mapped_tables[4];
    1480             :     int         reindex_flags;
    1481             :     int         i;
    1482             : 
    1483             :     /* Zero out possible results from swapped_relation_files */
    1484         105 :     memset(mapped_tables, 0, sizeof(mapped_tables));
    1485             : 
    1486             :     /*
    1487             :      * Swap the contents of the heap relations (including any toast tables).
    1488             :      * Also set old heap's relfrozenxid to frozenXid.
    1489             :      */
    1490         105 :     swap_relation_files(OIDOldHeap, OIDNewHeap,
    1491             :                         (OIDOldHeap == RelationRelationId),
    1492             :                         swap_toast_by_content, is_internal,
    1493             :                         frozenXid, cutoffMulti, mapped_tables);
    1494             : 
    1495             :     /*
    1496             :      * If it's a system catalog, queue an sinval message to flush all
    1497             :      * catcaches on the catalog when we reach CommandCounterIncrement.
    1498             :      */
    1499         105 :     if (is_system_catalog)
    1500           3 :         CacheInvalidateCatalog(OIDOldHeap);
    1501             : 
    1502             :     /*
    1503             :      * Rebuild each index on the relation (but not the toast table, which is
    1504             :      * all-new at this point).  It is important to do this before the DROP
    1505             :      * step because if we are processing a system catalog that will be used
    1506             :      * during DROP, we want to have its indexes available.  There is no
    1507             :      * advantage to the other order anyway because this is all transactional,
    1508             :      * so no chance to reclaim disk space before commit.  We do not need a
    1509             :      * final CommandCounterIncrement() because reindex_relation does it.
    1510             :      *
    1511             :      * Note: because index_build is called via reindex_relation, it will never
    1512             :      * set indcheckxmin true for the indexes.  This is OK even though in some
    1513             :      * sense we are building new indexes rather than rebuilding existing ones,
    1514             :      * because the new heap won't contain any HOT chains at all, let alone
    1515             :      * broken ones, so it can't be necessary to set indcheckxmin.
    1516             :      */
    1517         105 :     reindex_flags = REINDEX_REL_SUPPRESS_INDEX_USE;
    1518         105 :     if (check_constraints)
    1519          83 :         reindex_flags |= REINDEX_REL_CHECK_CONSTRAINTS;
    1520             : 
    1521             :     /*
    1522             :      * Ensure that the indexes have the same persistence as the parent
    1523             :      * relation.
    1524             :      */
    1525         105 :     if (newrelpersistence == RELPERSISTENCE_UNLOGGED)
    1526           3 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_UNLOGGED;
    1527         102 :     else if (newrelpersistence == RELPERSISTENCE_PERMANENT)
    1528          96 :         reindex_flags |= REINDEX_REL_FORCE_INDEXES_PERMANENT;
    1529             : 
    1530         105 :     reindex_relation(OIDOldHeap, reindex_flags, 0);
    1531             : 
    1532             :     /*
    1533             :      * If the relation being rebuild is pg_class, swap_relation_files()
    1534             :      * couldn't update pg_class's own pg_class entry (check comments in
    1535             :      * swap_relation_files()), thus relfrozenxid was not updated. That's
    1536             :      * annoying because a potential reason for doing a VACUUM FULL is a
    1537             :      * imminent or actual anti-wraparound shutdown.  So, now that we can
    1538             :      * access the new relation using it's indices, update relfrozenxid.
    1539             :      * pg_class doesn't have a toast relation, so we don't need to update the
    1540             :      * corresponding toast relation. Not that there's little point moving all
    1541             :      * relfrozenxid updates here since swap_relation_files() needs to write to
    1542             :      * pg_class for non-mapped relations anyway.
    1543             :      */
    1544         102 :     if (OIDOldHeap == RelationRelationId)
    1545             :     {
    1546             :         Relation    relRelation;
    1547             :         HeapTuple   reltup;
    1548             :         Form_pg_class relform;
    1549             : 
    1550           1 :         relRelation = heap_open(RelationRelationId, RowExclusiveLock);
    1551             : 
    1552           1 :         reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDOldHeap));
    1553           1 :         if (!HeapTupleIsValid(reltup))
    1554           0 :             elog(ERROR, "cache lookup failed for relation %u", OIDOldHeap);
    1555           1 :         relform = (Form_pg_class) GETSTRUCT(reltup);
    1556             : 
    1557           1 :         relform->relfrozenxid = frozenXid;
    1558           1 :         relform->relminmxid = cutoffMulti;
    1559             : 
    1560           1 :         CatalogTupleUpdate(relRelation, &reltup->t_self, reltup);
    1561             : 
    1562           1 :         heap_close(relRelation, RowExclusiveLock);
    1563             :     }
    1564             : 
    1565             :     /* Destroy new heap with old filenode */
    1566         102 :     object.classId = RelationRelationId;
    1567         102 :     object.objectId = OIDNewHeap;
    1568         102 :     object.objectSubId = 0;
    1569             : 
    1570             :     /*
    1571             :      * The new relation is local to our transaction and we know nothing
    1572             :      * depends on it, so DROP_RESTRICT should be OK.
    1573             :      */
    1574         102 :     performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
    1575             : 
    1576             :     /* performDeletion does CommandCounterIncrement at end */
    1577             : 
    1578             :     /*
    1579             :      * Now we must remove any relation mapping entries that we set up for the
    1580             :      * transient table, as well as its toast table and toast index if any. If
    1581             :      * we fail to do this before commit, the relmapper will complain about new
    1582             :      * permanent map entries being added post-bootstrap.
    1583             :      */
    1584         104 :     for (i = 0; OidIsValid(mapped_tables[i]); i++)
    1585           2 :         RelationMapRemoveMapping(mapped_tables[i]);
    1586             : 
    1587             :     /*
    1588             :      * At this point, everything is kosher except that, if we did toast swap
    1589             :      * by links, the toast table's name corresponds to the transient table.
    1590             :      * The name is irrelevant to the backend because it's referenced by OID,
    1591             :      * but users looking at the catalogs could be confused.  Rename it to
    1592             :      * prevent this problem.
    1593             :      *
    1594             :      * Note no lock required on the relation, because we already hold an
    1595             :      * exclusive lock on it.
    1596             :      */
    1597         102 :     if (!swap_toast_by_content)
    1598             :     {
    1599             :         Relation    newrel;
    1600             : 
    1601          98 :         newrel = heap_open(OIDOldHeap, NoLock);
    1602          98 :         if (OidIsValid(newrel->rd_rel->reltoastrelid))
    1603             :         {
    1604             :             Oid         toastidx;
    1605             :             char        NewToastName[NAMEDATALEN];
    1606             : 
    1607             :             /* Get the associated valid index to be renamed */
    1608          42 :             toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid,
    1609             :                                              AccessShareLock);
    1610             : 
    1611             :             /* rename the toast table ... */
    1612          42 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
    1613             :                      OIDOldHeap);
    1614          42 :             RenameRelationInternal(newrel->rd_rel->reltoastrelid,
    1615             :                                    NewToastName, true);
    1616             : 
    1617             :             /* ... and its valid index too. */
    1618          42 :             snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
    1619             :                      OIDOldHeap);
    1620             : 
    1621          42 :             RenameRelationInternal(toastidx,
    1622             :                                    NewToastName, true);
    1623             :         }
    1624          98 :         relation_close(newrel, NoLock);
    1625             :     }
    1626         102 : }
    1627             : 
    1628             : 
    1629             : /*
    1630             :  * Get a list of tables that the current user owns and
    1631             :  * have indisclustered set.  Return the list in a List * of rvsToCluster
    1632             :  * with the tableOid and the indexOid on which the table is already
    1633             :  * clustered.
    1634             :  */
    1635             : static List *
    1636           1 : get_tables_to_cluster(MemoryContext cluster_context)
    1637             : {
    1638             :     Relation    indRelation;
    1639             :     HeapScanDesc scan;
    1640             :     ScanKeyData entry;
    1641             :     HeapTuple   indexTuple;
    1642             :     Form_pg_index index;
    1643             :     MemoryContext old_context;
    1644             :     RelToCluster *rvtc;
    1645           1 :     List       *rvs = NIL;
    1646             : 
    1647             :     /*
    1648             :      * Get all indexes that have indisclustered set and are owned by
    1649             :      * appropriate user. System relations or nailed-in relations cannot ever
    1650             :      * have indisclustered set, because CLUSTER will refuse to set it when
    1651             :      * called with one of them as argument.
    1652             :      */
    1653           1 :     indRelation = heap_open(IndexRelationId, AccessShareLock);
    1654           1 :     ScanKeyInit(&entry,
    1655             :                 Anum_pg_index_indisclustered,
    1656             :                 BTEqualStrategyNumber, F_BOOLEQ,
    1657             :                 BoolGetDatum(true));
    1658           1 :     scan = heap_beginscan_catalog(indRelation, 1, &entry);
    1659           4 :     while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
    1660             :     {
    1661           2 :         index = (Form_pg_index) GETSTRUCT(indexTuple);
    1662             : 
    1663           2 :         if (!pg_class_ownercheck(index->indrelid, GetUserId()))
    1664           1 :             continue;
    1665             : 
    1666             :         /*
    1667             :          * We have to build the list in a different memory context so it will
    1668             :          * survive the cross-transaction processing
    1669             :          */
    1670           1 :         old_context = MemoryContextSwitchTo(cluster_context);
    1671             : 
    1672           1 :         rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
    1673           1 :         rvtc->tableOid = index->indrelid;
    1674           1 :         rvtc->indexOid = index->indexrelid;
    1675           1 :         rvs = lcons(rvtc, rvs);
    1676             : 
    1677           1 :         MemoryContextSwitchTo(old_context);
    1678             :     }
    1679           1 :     heap_endscan(scan);
    1680             : 
    1681           1 :     relation_close(indRelation, AccessShareLock);
    1682             : 
    1683           1 :     return rvs;
    1684             : }
    1685             : 
    1686             : 
    1687             : /*
    1688             :  * Reconstruct and rewrite the given tuple
    1689             :  *
    1690             :  * We cannot simply copy the tuple as-is, for several reasons:
    1691             :  *
    1692             :  * 1. We'd like to squeeze out the values of any dropped columns, both
    1693             :  * to save space and to ensure we have no corner-case failures. (It's
    1694             :  * possible for example that the new table hasn't got a TOAST table
    1695             :  * and so is unable to store any large values of dropped cols.)
    1696             :  *
    1697             :  * 2. The tuple might not even be legal for the new table; this is
    1698             :  * currently only known to happen as an after-effect of ALTER TABLE
    1699             :  * SET WITHOUT OIDS.
    1700             :  *
    1701             :  * So, we must reconstruct the tuple from component Datums.
    1702             :  */
    1703             : static void
    1704       22761 : reform_and_rewrite_tuple(HeapTuple tuple,
    1705             :                          TupleDesc oldTupDesc, TupleDesc newTupDesc,
    1706             :                          Datum *values, bool *isnull,
    1707             :                          bool newRelHasOids, RewriteState rwstate)
    1708             : {
    1709             :     HeapTuple   copiedTuple;
    1710             :     int         i;
    1711             : 
    1712       22761 :     heap_deform_tuple(tuple, oldTupDesc, values, isnull);
    1713             : 
    1714             :     /* Be sure to null out any dropped columns */
    1715      366004 :     for (i = 0; i < newTupDesc->natts; i++)
    1716             :     {
    1717      343243 :         if (TupleDescAttr(newTupDesc, i)->attisdropped)
    1718           0 :             isnull[i] = true;
    1719             :     }
    1720             : 
    1721       22761 :     copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
    1722             : 
    1723             :     /* Preserve OID, if any */
    1724       22761 :     if (newRelHasOids)
    1725         645 :         HeapTupleSetOid(copiedTuple, HeapTupleGetOid(tuple));
    1726             : 
    1727             :     /* The heap rewrite module does the rest */
    1728       22761 :     rewrite_heap_tuple(rwstate, tuple, copiedTuple);
    1729             : 
    1730       22761 :     heap_freetuple(copiedTuple);
    1731       22761 : }

Generated by: LCOV version 1.11