LCOV - code coverage report
Current view: top level - src/backend/utils/cache - relmapper.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 213 249 85.5 %
Date: 2017-09-29 13:40:31 Functions: 19 20 95.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * relmapper.c
       4             :  *    Catalog-to-filenode mapping
       5             :  *
       6             :  * For most tables, the physical file underlying the table is specified by
       7             :  * pg_class.relfilenode.  However, that obviously won't work for pg_class
       8             :  * itself, nor for the other "nailed" catalogs for which we have to be able
       9             :  * to set up working Relation entries without access to pg_class.  It also
      10             :  * does not work for shared catalogs, since there is no practical way to
      11             :  * update other databases' pg_class entries when relocating a shared catalog.
      12             :  * Therefore, for these special catalogs (henceforth referred to as "mapped
      13             :  * catalogs") we rely on a separately maintained file that shows the mapping
      14             :  * from catalog OIDs to filenode numbers.  Each database has a map file for
      15             :  * its local mapped catalogs, and there is a separate map file for shared
      16             :  * catalogs.  Mapped catalogs have zero in their pg_class.relfilenode entries.
      17             :  *
      18             :  * Relocation of a normal table is committed (ie, the new physical file becomes
      19             :  * authoritative) when the pg_class row update commits.  For mapped catalogs,
      20             :  * the act of updating the map file is effectively commit of the relocation.
      21             :  * We postpone the file update till just before commit of the transaction
      22             :  * doing the rewrite, but there is necessarily a window between.  Therefore
      23             :  * mapped catalogs can only be relocated by operations such as VACUUM FULL
      24             :  * and CLUSTER, which make no transactionally-significant changes: it must be
      25             :  * safe for the new file to replace the old, even if the transaction itself
      26             :  * aborts.  An important factor here is that the indexes and toast table of
      27             :  * a mapped catalog must also be mapped, so that the rewrites/relocations of
      28             :  * all these files commit in a single map file update rather than being tied
      29             :  * to transaction commit.
      30             :  *
      31             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      32             :  * Portions Copyright (c) 1994, Regents of the University of California
      33             :  *
      34             :  *
      35             :  * IDENTIFICATION
      36             :  *    src/backend/utils/cache/relmapper.c
      37             :  *
      38             :  *-------------------------------------------------------------------------
      39             :  */
      40             : #include "postgres.h"
      41             : 
      42             : #include <fcntl.h>
      43             : #include <sys/stat.h>
      44             : #include <unistd.h>
      45             : 
      46             : #include "access/xact.h"
      47             : #include "access/xlog.h"
      48             : #include "access/xloginsert.h"
      49             : #include "catalog/catalog.h"
      50             : #include "catalog/pg_tablespace.h"
      51             : #include "catalog/storage.h"
      52             : #include "miscadmin.h"
      53             : #include "pgstat.h"
      54             : #include "storage/fd.h"
      55             : #include "storage/lwlock.h"
      56             : #include "utils/inval.h"
      57             : #include "utils/relmapper.h"
      58             : 
      59             : 
      60             : /*
      61             :  * The map file is critical data: we have no automatic method for recovering
      62             :  * from loss or corruption of it.  We use a CRC so that we can detect
      63             :  * corruption.  To minimize the risk of failed updates, the map file should
      64             :  * be kept to no more than one standard-size disk sector (ie 512 bytes),
      65             :  * and we use overwrite-in-place rather than playing renaming games.
      66             :  * The struct layout below is designed to occupy exactly 512 bytes, which
      67             :  * might make filesystem updates a bit more efficient.
      68             :  *
      69             :  * Entries in the mappings[] array are in no particular order.  We could
      70             :  * speed searching by insisting on OID order, but it really shouldn't be
      71             :  * worth the trouble given the intended size of the mapping sets.
      72             :  */
      73             : #define RELMAPPER_FILENAME      "pg_filenode.map"
      74             : 
      75             : #define RELMAPPER_FILEMAGIC     0x592717    /* version ID value */
      76             : 
      77             : #define MAX_MAPPINGS            62  /* 62 * 8 + 16 = 512 */
      78             : 
      79             : typedef struct RelMapping
      80             : {
      81             :     Oid         mapoid;         /* OID of a catalog */
      82             :     Oid         mapfilenode;    /* its filenode number */
      83             : } RelMapping;
      84             : 
      85             : typedef struct RelMapFile
      86             : {
      87             :     int32       magic;          /* always RELMAPPER_FILEMAGIC */
      88             :     int32       num_mappings;   /* number of valid RelMapping entries */
      89             :     RelMapping  mappings[MAX_MAPPINGS];
      90             :     pg_crc32c   crc;            /* CRC of all above */
      91             :     int32       pad;            /* to make the struct size be 512 exactly */
      92             : } RelMapFile;
      93             : 
      94             : /*
      95             :  * The currently known contents of the shared map file and our database's
      96             :  * local map file are stored here.  These can be reloaded from disk
      97             :  * immediately whenever we receive an update sinval message.
      98             :  */
      99             : static RelMapFile shared_map;
     100             : static RelMapFile local_map;
     101             : 
     102             : /*
     103             :  * We use the same RelMapFile data structure to track uncommitted local
     104             :  * changes in the mappings (but note the magic and crc fields are not made
     105             :  * valid in these variables).  Currently, map updates are not allowed within
     106             :  * subtransactions, so one set of transaction-level changes is sufficient.
     107             :  *
     108             :  * The active_xxx variables contain updates that are valid in our transaction
     109             :  * and should be honored by RelationMapOidToFilenode.  The pending_xxx
     110             :  * variables contain updates we have been told about that aren't active yet;
     111             :  * they will become active at the next CommandCounterIncrement.  This setup
     112             :  * lets map updates act similarly to updates of pg_class rows, ie, they
     113             :  * become visible only at the next CommandCounterIncrement boundary.
     114             :  */
     115             : static RelMapFile active_shared_updates;
     116             : static RelMapFile active_local_updates;
     117             : static RelMapFile pending_shared_updates;
     118             : static RelMapFile pending_local_updates;
     119             : 
     120             : 
     121             : /* non-export function prototypes */
     122             : static void apply_map_update(RelMapFile *map, Oid relationId, Oid fileNode,
     123             :                  bool add_okay);
     124             : static void merge_map_updates(RelMapFile *map, const RelMapFile *updates,
     125             :                   bool add_okay);
     126             : static void load_relmap_file(bool shared);
     127             : static void write_relmap_file(bool shared, RelMapFile *newmap,
     128             :                   bool write_wal, bool send_sinval, bool preserve_files,
     129             :                   Oid dbid, Oid tsid, const char *dbpath);
     130             : static void perform_relmap_update(bool shared, const RelMapFile *updates);
     131             : 
     132             : 
     133             : /*
     134             :  * RelationMapOidToFilenode
     135             :  *
     136             :  * The raison d' etre ... given a relation OID, look up its filenode.
     137             :  *
     138             :  * Although shared and local relation OIDs should never overlap, the caller
     139             :  * always knows which we need --- so pass that information to avoid useless
     140             :  * searching.
     141             :  *
     142             :  * Returns InvalidOid if the OID is not known (which should never happen,
     143             :  * but the caller is in a better position to report a meaningful error).
     144             :  */
     145             : Oid
     146       18120 : RelationMapOidToFilenode(Oid relationId, bool shared)
     147             : {
     148             :     const RelMapFile *map;
     149             :     int32       i;
     150             : 
     151             :     /* If there are active updates, believe those over the main maps */
     152       18120 :     if (shared)
     153             :     {
     154       11423 :         map = &active_shared_updates;
     155       11435 :         for (i = 0; i < map->num_mappings; i++)
     156             :         {
     157          23 :             if (relationId == map->mappings[i].mapoid)
     158          11 :                 return map->mappings[i].mapfilenode;
     159             :         }
     160       11412 :         map = &shared_map;
     161      200930 :         for (i = 0; i < map->num_mappings; i++)
     162             :         {
     163      200930 :             if (relationId == map->mappings[i].mapoid)
     164       11412 :                 return map->mappings[i].mapfilenode;
     165             :         }
     166             :     }
     167             :     else
     168             :     {
     169        6697 :         map = &active_local_updates;
     170        6726 :         for (i = 0; i < map->num_mappings; i++)
     171             :         {
     172          50 :             if (relationId == map->mappings[i].mapoid)
     173          21 :                 return map->mappings[i].mapfilenode;
     174             :         }
     175        6676 :         map = &local_map;
     176       49599 :         for (i = 0; i < map->num_mappings; i++)
     177             :         {
     178       49599 :             if (relationId == map->mappings[i].mapoid)
     179        6676 :                 return map->mappings[i].mapfilenode;
     180             :         }
     181             :     }
     182             : 
     183           0 :     return InvalidOid;
     184             : }
     185             : 
     186             : /*
     187             :  * RelationMapFilenodeToOid
     188             :  *
     189             :  * Do the reverse of the normal direction of mapping done in
     190             :  * RelationMapOidToFilenode.
     191             :  *
     192             :  * This is not supposed to be used during normal running but rather for
     193             :  * information purposes when looking at the filesystem or xlog.
     194             :  *
     195             :  * Returns InvalidOid if the OID is not known; this can easily happen if the
     196             :  * relfilenode doesn't pertain to a mapped relation.
     197             :  */
     198             : Oid
     199          50 : RelationMapFilenodeToOid(Oid filenode, bool shared)
     200             : {
     201             :     const RelMapFile *map;
     202             :     int32       i;
     203             : 
     204             :     /* If there are active updates, believe those over the main maps */
     205          50 :     if (shared)
     206             :     {
     207          35 :         map = &active_shared_updates;
     208          35 :         for (i = 0; i < map->num_mappings; i++)
     209             :         {
     210           0 :             if (filenode == map->mappings[i].mapfilenode)
     211           0 :                 return map->mappings[i].mapoid;
     212             :         }
     213          35 :         map = &shared_map;
     214         630 :         for (i = 0; i < map->num_mappings; i++)
     215             :         {
     216         630 :             if (filenode == map->mappings[i].mapfilenode)
     217          35 :                 return map->mappings[i].mapoid;
     218             :         }
     219             :     }
     220             :     else
     221             :     {
     222          15 :         map = &active_local_updates;
     223          15 :         for (i = 0; i < map->num_mappings; i++)
     224             :         {
     225           0 :             if (filenode == map->mappings[i].mapfilenode)
     226           0 :                 return map->mappings[i].mapoid;
     227             :         }
     228          15 :         map = &local_map;
     229         120 :         for (i = 0; i < map->num_mappings; i++)
     230             :         {
     231         120 :             if (filenode == map->mappings[i].mapfilenode)
     232          15 :                 return map->mappings[i].mapoid;
     233             :         }
     234             :     }
     235             : 
     236           0 :     return InvalidOid;
     237             : }
     238             : 
     239             : /*
     240             :  * RelationMapUpdateMap
     241             :  *
     242             :  * Install a new relfilenode mapping for the specified relation.
     243             :  *
     244             :  * If immediate is true (or we're bootstrapping), the mapping is activated
     245             :  * immediately.  Otherwise it is made pending until CommandCounterIncrement.
     246             :  */
     247             : void
     248          65 : RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared,
     249             :                      bool immediate)
     250             : {
     251             :     RelMapFile *map;
     252             : 
     253          65 :     if (IsBootstrapProcessingMode())
     254             :     {
     255             :         /*
     256             :          * In bootstrap mode, the mapping gets installed in permanent map.
     257             :          */
     258          54 :         if (shared)
     259          35 :             map = &shared_map;
     260             :         else
     261          19 :             map = &local_map;
     262             :     }
     263             :     else
     264             :     {
     265             :         /*
     266             :          * We don't currently support map changes within subtransactions. This
     267             :          * could be done with more bookkeeping infrastructure, but it doesn't
     268             :          * presently seem worth it.
     269             :          */
     270          11 :         if (GetCurrentTransactionNestLevel() > 1)
     271           0 :             elog(ERROR, "cannot change relation mapping within subtransaction");
     272             : 
     273          11 :         if (immediate)
     274             :         {
     275             :             /* Make it active, but only locally */
     276           2 :             if (shared)
     277           0 :                 map = &active_shared_updates;
     278             :             else
     279           2 :                 map = &active_local_updates;
     280             :         }
     281             :         else
     282             :         {
     283             :             /* Make it pending */
     284           9 :             if (shared)
     285           3 :                 map = &pending_shared_updates;
     286             :             else
     287           6 :                 map = &pending_local_updates;
     288             :         }
     289             :     }
     290          65 :     apply_map_update(map, relationId, fileNode, true);
     291          65 : }
     292             : 
     293             : /*
     294             :  * apply_map_update
     295             :  *
     296             :  * Insert a new mapping into the given map variable, replacing any existing
     297             :  * mapping for the same relation.
     298             :  *
     299             :  * In some cases the caller knows there must be an existing mapping; pass
     300             :  * add_okay = false to draw an error if not.
     301             :  */
     302             : static void
     303          81 : apply_map_update(RelMapFile *map, Oid relationId, Oid fileNode, bool add_okay)
     304             : {
     305             :     int32       i;
     306             : 
     307             :     /* Replace any existing mapping */
     308         871 :     for (i = 0; i < map->num_mappings; i++)
     309             :     {
     310         803 :         if (relationId == map->mappings[i].mapoid)
     311             :         {
     312          13 :             map->mappings[i].mapfilenode = fileNode;
     313          94 :             return;
     314             :         }
     315             :     }
     316             : 
     317             :     /* Nope, need to add a new mapping */
     318          68 :     if (!add_okay)
     319           0 :         elog(ERROR, "attempt to apply a mapping to unmapped relation %u",
     320             :              relationId);
     321          68 :     if (map->num_mappings >= MAX_MAPPINGS)
     322           0 :         elog(ERROR, "ran out of space in relation map");
     323          68 :     map->mappings[map->num_mappings].mapoid = relationId;
     324          68 :     map->mappings[map->num_mappings].mapfilenode = fileNode;
     325          68 :     map->num_mappings++;
     326             : }
     327             : 
     328             : /*
     329             :  * merge_map_updates
     330             :  *
     331             :  * Merge all the updates in the given pending-update map into the target map.
     332             :  * This is just a bulk form of apply_map_update.
     333             :  */
     334             : static void
     335          10 : merge_map_updates(RelMapFile *map, const RelMapFile *updates, bool add_okay)
     336             : {
     337             :     int32       i;
     338             : 
     339          26 :     for (i = 0; i < updates->num_mappings; i++)
     340             :     {
     341          16 :         apply_map_update(map,
     342             :                          updates->mappings[i].mapoid,
     343             :                          updates->mappings[i].mapfilenode,
     344             :                          add_okay);
     345             :     }
     346          10 : }
     347             : 
     348             : /*
     349             :  * RelationMapRemoveMapping
     350             :  *
     351             :  * Remove a relation's entry in the map.  This is only allowed for "active"
     352             :  * (but not committed) local mappings.  We need it so we can back out the
     353             :  * entry for the transient target file when doing VACUUM FULL/CLUSTER on
     354             :  * a mapped relation.
     355             :  */
     356             : void
     357           2 : RelationMapRemoveMapping(Oid relationId)
     358             : {
     359           2 :     RelMapFile *map = &active_local_updates;
     360             :     int32       i;
     361             : 
     362           2 :     for (i = 0; i < map->num_mappings; i++)
     363             :     {
     364           2 :         if (relationId == map->mappings[i].mapoid)
     365             :         {
     366             :             /* Found it, collapse it out */
     367           2 :             map->mappings[i] = map->mappings[map->num_mappings - 1];
     368           2 :             map->num_mappings--;
     369           4 :             return;
     370             :         }
     371             :     }
     372           0 :     elog(ERROR, "could not find temporary mapping for relation %u",
     373             :          relationId);
     374             : }
     375             : 
     376             : /*
     377             :  * RelationMapInvalidate
     378             :  *
     379             :  * This routine is invoked for SI cache flush messages.  We must re-read
     380             :  * the indicated map file.  However, we might receive a SI message in a
     381             :  * process that hasn't yet, and might never, load the mapping files;
     382             :  * for example the autovacuum launcher, which *must not* try to read
     383             :  * a local map since it is attached to no particular database.
     384             :  * So, re-read only if the map is valid now.
     385             :  */
     386             : void
     387          16 : RelationMapInvalidate(bool shared)
     388             : {
     389          16 :     if (shared)
     390             :     {
     391           9 :         if (shared_map.magic == RELMAPPER_FILEMAGIC)
     392           9 :             load_relmap_file(true);
     393             :     }
     394             :     else
     395             :     {
     396           7 :         if (local_map.magic == RELMAPPER_FILEMAGIC)
     397           7 :             load_relmap_file(false);
     398             :     }
     399          16 : }
     400             : 
     401             : /*
     402             :  * RelationMapInvalidateAll
     403             :  *
     404             :  * Reload all map files.  This is used to recover from SI message buffer
     405             :  * overflow: we can't be sure if we missed an inval message.
     406             :  * Again, reload only currently-valid maps.
     407             :  */
     408             : void
     409         145 : RelationMapInvalidateAll(void)
     410             : {
     411         145 :     if (shared_map.magic == RELMAPPER_FILEMAGIC)
     412         145 :         load_relmap_file(true);
     413         145 :     if (local_map.magic == RELMAPPER_FILEMAGIC)
     414         143 :         load_relmap_file(false);
     415         145 : }
     416             : 
     417             : /*
     418             :  * AtCCI_RelationMap
     419             :  *
     420             :  * Activate any "pending" relation map updates at CommandCounterIncrement time.
     421             :  */
     422             : void
     423       22045 : AtCCI_RelationMap(void)
     424             : {
     425       22045 :     if (pending_shared_updates.num_mappings != 0)
     426             :     {
     427           3 :         merge_map_updates(&active_shared_updates,
     428             :                           &pending_shared_updates,
     429             :                           true);
     430           3 :         pending_shared_updates.num_mappings = 0;
     431             :     }
     432       22045 :     if (pending_local_updates.num_mappings != 0)
     433             :     {
     434           5 :         merge_map_updates(&active_local_updates,
     435             :                           &pending_local_updates,
     436             :                           true);
     437           5 :         pending_local_updates.num_mappings = 0;
     438             :     }
     439       22045 : }
     440             : 
     441             : /*
     442             :  * AtEOXact_RelationMap
     443             :  *
     444             :  * Handle relation mapping at main-transaction commit or abort.
     445             :  *
     446             :  * During commit, this must be called as late as possible before the actual
     447             :  * transaction commit, so as to minimize the window where the transaction
     448             :  * could still roll back after committing map changes.  Although nothing
     449             :  * critically bad happens in such a case, we still would prefer that it
     450             :  * not happen, since we'd possibly be losing useful updates to the relations'
     451             :  * pg_class row(s).
     452             :  *
     453             :  * During abort, we just have to throw away any pending map changes.
     454             :  * Normal post-abort cleanup will take care of fixing relcache entries.
     455             :  */
     456             : void
     457       26161 : AtEOXact_RelationMap(bool isCommit)
     458             : {
     459       26161 :     if (isCommit)
     460             :     {
     461             :         /*
     462             :          * We should not get here with any "pending" updates.  (We could
     463             :          * logically choose to treat such as committed, but in the current
     464             :          * code this should never happen.)
     465             :          */
     466       22883 :         Assert(pending_shared_updates.num_mappings == 0);
     467       22883 :         Assert(pending_local_updates.num_mappings == 0);
     468             : 
     469             :         /*
     470             :          * Write any active updates to the actual map files, then reset them.
     471             :          */
     472       22883 :         if (active_shared_updates.num_mappings != 0)
     473             :         {
     474           1 :             perform_relmap_update(true, &active_shared_updates);
     475           1 :             active_shared_updates.num_mappings = 0;
     476             :         }
     477       22883 :         if (active_local_updates.num_mappings != 0)
     478             :         {
     479           1 :             perform_relmap_update(false, &active_local_updates);
     480           1 :             active_local_updates.num_mappings = 0;
     481             :         }
     482             :     }
     483             :     else
     484             :     {
     485             :         /* Abort --- drop all local and pending updates */
     486        3278 :         active_shared_updates.num_mappings = 0;
     487        3278 :         active_local_updates.num_mappings = 0;
     488        3278 :         pending_shared_updates.num_mappings = 0;
     489        3278 :         pending_local_updates.num_mappings = 0;
     490             :     }
     491       26161 : }
     492             : 
     493             : /*
     494             :  * AtPrepare_RelationMap
     495             :  *
     496             :  * Handle relation mapping at PREPARE.
     497             :  *
     498             :  * Currently, we don't support preparing any transaction that changes the map.
     499             :  */
     500             : void
     501           6 : AtPrepare_RelationMap(void)
     502             : {
     503          12 :     if (active_shared_updates.num_mappings != 0 ||
     504          12 :         active_local_updates.num_mappings != 0 ||
     505          12 :         pending_shared_updates.num_mappings != 0 ||
     506           6 :         pending_local_updates.num_mappings != 0)
     507           0 :         ereport(ERROR,
     508             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     509             :                  errmsg("cannot PREPARE a transaction that modified relation mapping")));
     510           6 : }
     511             : 
     512             : /*
     513             :  * CheckPointRelationMap
     514             :  *
     515             :  * This is called during a checkpoint.  It must ensure that any relation map
     516             :  * updates that were WAL-logged before the start of the checkpoint are
     517             :  * securely flushed to disk and will not need to be replayed later.  This
     518             :  * seems unlikely to be a performance-critical issue, so we use a simple
     519             :  * method: we just take and release the RelationMappingLock.  This ensures
     520             :  * that any already-logged map update is complete, because write_relmap_file
     521             :  * will fsync the map file before the lock is released.
     522             :  */
     523             : void
     524          11 : CheckPointRelationMap(void)
     525             : {
     526          11 :     LWLockAcquire(RelationMappingLock, LW_SHARED);
     527          11 :     LWLockRelease(RelationMappingLock);
     528          11 : }
     529             : 
     530             : /*
     531             :  * RelationMapFinishBootstrap
     532             :  *
     533             :  * Write out the initial relation mapping files at the completion of
     534             :  * bootstrap.  All the mapped files should have been made known to us
     535             :  * via RelationMapUpdateMap calls.
     536             :  */
     537             : void
     538           1 : RelationMapFinishBootstrap(void)
     539             : {
     540           1 :     Assert(IsBootstrapProcessingMode());
     541             : 
     542             :     /* Shouldn't be anything "pending" ... */
     543           1 :     Assert(active_shared_updates.num_mappings == 0);
     544           1 :     Assert(active_local_updates.num_mappings == 0);
     545           1 :     Assert(pending_shared_updates.num_mappings == 0);
     546           1 :     Assert(pending_local_updates.num_mappings == 0);
     547             : 
     548             :     /* Write the files; no WAL or sinval needed */
     549           1 :     write_relmap_file(true, &shared_map, false, false, false,
     550             :                       InvalidOid, GLOBALTABLESPACE_OID, NULL);
     551           1 :     write_relmap_file(false, &local_map, false, false, false,
     552             :                       MyDatabaseId, MyDatabaseTableSpace, DatabasePath);
     553           1 : }
     554             : 
     555             : /*
     556             :  * RelationMapInitialize
     557             :  *
     558             :  * This initializes the mapper module at process startup.  We can't access the
     559             :  * database yet, so just make sure the maps are empty.
     560             :  */
     561             : void
     562         338 : RelationMapInitialize(void)
     563             : {
     564             :     /* The static variables should initialize to zeroes, but let's be sure */
     565         338 :     shared_map.magic = 0;       /* mark it not loaded */
     566         338 :     local_map.magic = 0;
     567         338 :     shared_map.num_mappings = 0;
     568         338 :     local_map.num_mappings = 0;
     569         338 :     active_shared_updates.num_mappings = 0;
     570         338 :     active_local_updates.num_mappings = 0;
     571         338 :     pending_shared_updates.num_mappings = 0;
     572         338 :     pending_local_updates.num_mappings = 0;
     573         338 : }
     574             : 
     575             : /*
     576             :  * RelationMapInitializePhase2
     577             :  *
     578             :  * This is called to prepare for access to pg_database during startup.
     579             :  * We should be able to read the shared map file now.
     580             :  */
     581             : void
     582         338 : RelationMapInitializePhase2(void)
     583             : {
     584             :     /*
     585             :      * In bootstrap mode, the map file isn't there yet, so do nothing.
     586             :      */
     587         338 :     if (IsBootstrapProcessingMode())
     588         339 :         return;
     589             : 
     590             :     /*
     591             :      * Load the shared map file, die on error.
     592             :      */
     593         337 :     load_relmap_file(true);
     594             : }
     595             : 
     596             : /*
     597             :  * RelationMapInitializePhase3
     598             :  *
     599             :  * This is called as soon as we have determined MyDatabaseId and set up
     600             :  * DatabasePath.  At this point we should be able to read the local map file.
     601             :  */
     602             : void
     603         336 : RelationMapInitializePhase3(void)
     604             : {
     605             :     /*
     606             :      * In bootstrap mode, the map file isn't there yet, so do nothing.
     607             :      */
     608         336 :     if (IsBootstrapProcessingMode())
     609         337 :         return;
     610             : 
     611             :     /*
     612             :      * Load the local map file, die on error.
     613             :      */
     614         335 :     load_relmap_file(false);
     615             : }
     616             : 
     617             : /*
     618             :  * load_relmap_file -- load data from the shared or local map file
     619             :  *
     620             :  * Because the map file is essential for access to core system catalogs,
     621             :  * failure to read it is a fatal error.
     622             :  *
     623             :  * Note that the local case requires DatabasePath to be set up.
     624             :  */
     625             : static void
     626         978 : load_relmap_file(bool shared)
     627             : {
     628             :     RelMapFile *map;
     629             :     char        mapfilename[MAXPGPATH];
     630             :     pg_crc32c   crc;
     631             :     int         fd;
     632             : 
     633         978 :     if (shared)
     634             :     {
     635         492 :         snprintf(mapfilename, sizeof(mapfilename), "global/%s",
     636             :                  RELMAPPER_FILENAME);
     637         492 :         map = &shared_map;
     638             :     }
     639             :     else
     640             :     {
     641         486 :         snprintf(mapfilename, sizeof(mapfilename), "%s/%s",
     642             :                  DatabasePath, RELMAPPER_FILENAME);
     643         486 :         map = &local_map;
     644             :     }
     645             : 
     646             :     /* Read data ... */
     647         978 :     fd = OpenTransientFile(mapfilename,
     648             :                            O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
     649         978 :     if (fd < 0)
     650           0 :         ereport(FATAL,
     651             :                 (errcode_for_file_access(),
     652             :                  errmsg("could not open relation mapping file \"%s\": %m",
     653             :                         mapfilename)));
     654             : 
     655             :     /*
     656             :      * Note: we could take RelationMappingLock in shared mode here, but it
     657             :      * seems unnecessary since our read() should be atomic against any
     658             :      * concurrent updater's write().  If the file is updated shortly after we
     659             :      * look, the sinval signaling mechanism will make us re-read it before we
     660             :      * are able to access any relation that's affected by the change.
     661             :      */
     662         978 :     pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_READ);
     663         978 :     if (read(fd, map, sizeof(RelMapFile)) != sizeof(RelMapFile))
     664           0 :         ereport(FATAL,
     665             :                 (errcode_for_file_access(),
     666             :                  errmsg("could not read relation mapping file \"%s\": %m",
     667             :                         mapfilename)));
     668         978 :     pgstat_report_wait_end();
     669             : 
     670         978 :     CloseTransientFile(fd);
     671             : 
     672             :     /* check for correct magic number, etc */
     673        1956 :     if (map->magic != RELMAPPER_FILEMAGIC ||
     674        1956 :         map->num_mappings < 0 ||
     675         978 :         map->num_mappings > MAX_MAPPINGS)
     676           0 :         ereport(FATAL,
     677             :                 (errmsg("relation mapping file \"%s\" contains invalid data",
     678             :                         mapfilename)));
     679             : 
     680             :     /* verify the CRC */
     681         978 :     INIT_CRC32C(crc);
     682         978 :     COMP_CRC32C(crc, (char *) map, offsetof(RelMapFile, crc));
     683         978 :     FIN_CRC32C(crc);
     684             : 
     685         978 :     if (!EQ_CRC32C(crc, map->crc))
     686           0 :         ereport(FATAL,
     687             :                 (errmsg("relation mapping file \"%s\" contains incorrect checksum",
     688             :                         mapfilename)));
     689         978 : }
     690             : 
     691             : /*
     692             :  * Write out a new shared or local map file with the given contents.
     693             :  *
     694             :  * The magic number and CRC are automatically updated in *newmap.  On
     695             :  * success, we copy the data to the appropriate permanent static variable.
     696             :  *
     697             :  * If write_wal is TRUE then an appropriate WAL message is emitted.
     698             :  * (It will be false for bootstrap and WAL replay cases.)
     699             :  *
     700             :  * If send_sinval is TRUE then a SI invalidation message is sent.
     701             :  * (This should be true except in bootstrap case.)
     702             :  *
     703             :  * If preserve_files is TRUE then the storage manager is warned not to
     704             :  * delete the files listed in the map.
     705             :  *
     706             :  * Because this may be called during WAL replay when MyDatabaseId,
     707             :  * DatabasePath, etc aren't valid, we require the caller to pass in suitable
     708             :  * values.  The caller is also responsible for being sure no concurrent
     709             :  * map update could be happening.
     710             :  */
     711             : static void
     712           4 : write_relmap_file(bool shared, RelMapFile *newmap,
     713             :                   bool write_wal, bool send_sinval, bool preserve_files,
     714             :                   Oid dbid, Oid tsid, const char *dbpath)
     715             : {
     716             :     int         fd;
     717             :     RelMapFile *realmap;
     718             :     char        mapfilename[MAXPGPATH];
     719             : 
     720             :     /*
     721             :      * Fill in the overhead fields and update CRC.
     722             :      */
     723           4 :     newmap->magic = RELMAPPER_FILEMAGIC;
     724           4 :     if (newmap->num_mappings < 0 || newmap->num_mappings > MAX_MAPPINGS)
     725           0 :         elog(ERROR, "attempt to write bogus relation mapping");
     726             : 
     727           4 :     INIT_CRC32C(newmap->crc);
     728           4 :     COMP_CRC32C(newmap->crc, (char *) newmap, offsetof(RelMapFile, crc));
     729           4 :     FIN_CRC32C(newmap->crc);
     730             : 
     731             :     /*
     732             :      * Open the target file.  We prefer to do this before entering the
     733             :      * critical section, so that an open() failure need not force PANIC.
     734             :      */
     735           4 :     if (shared)
     736             :     {
     737           2 :         snprintf(mapfilename, sizeof(mapfilename), "global/%s",
     738             :                  RELMAPPER_FILENAME);
     739           2 :         realmap = &shared_map;
     740             :     }
     741             :     else
     742             :     {
     743           2 :         snprintf(mapfilename, sizeof(mapfilename), "%s/%s",
     744             :                  dbpath, RELMAPPER_FILENAME);
     745           2 :         realmap = &local_map;
     746             :     }
     747             : 
     748           4 :     fd = OpenTransientFile(mapfilename,
     749             :                            O_WRONLY | O_CREAT | PG_BINARY,
     750             :                            S_IRUSR | S_IWUSR);
     751           4 :     if (fd < 0)
     752           0 :         ereport(ERROR,
     753             :                 (errcode_for_file_access(),
     754             :                  errmsg("could not open relation mapping file \"%s\": %m",
     755             :                         mapfilename)));
     756             : 
     757           4 :     if (write_wal)
     758             :     {
     759             :         xl_relmap_update xlrec;
     760             :         XLogRecPtr  lsn;
     761             : 
     762             :         /* now errors are fatal ... */
     763           2 :         START_CRIT_SECTION();
     764             : 
     765           2 :         xlrec.dbid = dbid;
     766           2 :         xlrec.tsid = tsid;
     767           2 :         xlrec.nbytes = sizeof(RelMapFile);
     768             : 
     769           2 :         XLogBeginInsert();
     770           2 :         XLogRegisterData((char *) (&xlrec), MinSizeOfRelmapUpdate);
     771           2 :         XLogRegisterData((char *) newmap, sizeof(RelMapFile));
     772             : 
     773           2 :         lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE);
     774             : 
     775             :         /* As always, WAL must hit the disk before the data update does */
     776           2 :         XLogFlush(lsn);
     777             :     }
     778             : 
     779           4 :     errno = 0;
     780           4 :     pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_WRITE);
     781           4 :     if (write(fd, newmap, sizeof(RelMapFile)) != sizeof(RelMapFile))
     782             :     {
     783             :         /* if write didn't set errno, assume problem is no disk space */
     784           0 :         if (errno == 0)
     785           0 :             errno = ENOSPC;
     786           0 :         ereport(ERROR,
     787             :                 (errcode_for_file_access(),
     788             :                  errmsg("could not write to relation mapping file \"%s\": %m",
     789             :                         mapfilename)));
     790             :     }
     791           4 :     pgstat_report_wait_end();
     792             : 
     793             :     /*
     794             :      * We choose to fsync the data to disk before considering the task done.
     795             :      * It would be possible to relax this if it turns out to be a performance
     796             :      * issue, but it would complicate checkpointing --- see notes for
     797             :      * CheckPointRelationMap.
     798             :      */
     799           4 :     pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC);
     800           4 :     if (pg_fsync(fd) != 0)
     801           0 :         ereport(ERROR,
     802             :                 (errcode_for_file_access(),
     803             :                  errmsg("could not fsync relation mapping file \"%s\": %m",
     804             :                         mapfilename)));
     805           4 :     pgstat_report_wait_end();
     806             : 
     807           4 :     if (CloseTransientFile(fd))
     808           0 :         ereport(ERROR,
     809             :                 (errcode_for_file_access(),
     810             :                  errmsg("could not close relation mapping file \"%s\": %m",
     811             :                         mapfilename)));
     812             : 
     813             :     /*
     814             :      * Now that the file is safely on disk, send sinval message to let other
     815             :      * backends know to re-read it.  We must do this inside the critical
     816             :      * section: if for some reason we fail to send the message, we have to
     817             :      * force a database-wide PANIC.  Otherwise other backends might continue
     818             :      * execution with stale mapping information, which would be catastrophic
     819             :      * as soon as others began to use the now-committed data.
     820             :      */
     821           4 :     if (send_sinval)
     822           2 :         CacheInvalidateRelmap(dbid);
     823             : 
     824             :     /*
     825             :      * Make sure that the files listed in the map are not deleted if the outer
     826             :      * transaction aborts.  This had better be within the critical section
     827             :      * too: it's not likely to fail, but if it did, we'd arrive at transaction
     828             :      * abort with the files still vulnerable.  PANICing will leave things in a
     829             :      * good state on-disk.
     830             :      *
     831             :      * Note: we're cheating a little bit here by assuming that mapped files
     832             :      * are either in pg_global or the database's default tablespace.
     833             :      */
     834           4 :     if (preserve_files)
     835             :     {
     836             :         int32       i;
     837             : 
     838          52 :         for (i = 0; i < newmap->num_mappings; i++)
     839             :         {
     840             :             RelFileNode rnode;
     841             : 
     842          50 :             rnode.spcNode = tsid;
     843          50 :             rnode.dbNode = dbid;
     844          50 :             rnode.relNode = newmap->mappings[i].mapfilenode;
     845          50 :             RelationPreserveStorage(rnode, false);
     846             :         }
     847             :     }
     848             : 
     849             :     /* Success, update permanent copy */
     850           4 :     memcpy(realmap, newmap, sizeof(RelMapFile));
     851             : 
     852             :     /* Critical section done */
     853           4 :     if (write_wal)
     854           2 :         END_CRIT_SECTION();
     855           4 : }
     856             : 
     857             : /*
     858             :  * Merge the specified updates into the appropriate "real" map,
     859             :  * and write out the changes.  This function must be used for committing
     860             :  * updates during normal multiuser operation.
     861             :  */
     862             : static void
     863           2 : perform_relmap_update(bool shared, const RelMapFile *updates)
     864             : {
     865             :     RelMapFile  newmap;
     866             : 
     867             :     /*
     868             :      * Anyone updating a relation's mapping info should take exclusive lock on
     869             :      * that rel and hold it until commit.  This ensures that there will not be
     870             :      * concurrent updates on the same mapping value; but there could easily be
     871             :      * concurrent updates on different values in the same file. We cover that
     872             :      * by acquiring the RelationMappingLock, re-reading the target file to
     873             :      * ensure it's up to date, applying the updates, and writing the data
     874             :      * before releasing RelationMappingLock.
     875             :      *
     876             :      * There is only one RelationMappingLock.  In principle we could try to
     877             :      * have one per mapping file, but it seems unlikely to be worth the
     878             :      * trouble.
     879             :      */
     880           2 :     LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
     881             : 
     882             :     /* Be certain we see any other updates just made */
     883           2 :     load_relmap_file(shared);
     884             : 
     885             :     /* Prepare updated data in a local variable */
     886           2 :     if (shared)
     887           1 :         memcpy(&newmap, &shared_map, sizeof(RelMapFile));
     888             :     else
     889           1 :         memcpy(&newmap, &local_map, sizeof(RelMapFile));
     890             : 
     891             :     /*
     892             :      * Apply the updates to newmap.  No new mappings should appear, unless
     893             :      * somebody is adding indexes to system catalogs.
     894             :      */
     895           2 :     merge_map_updates(&newmap, updates, allowSystemTableMods);
     896             : 
     897             :     /* Write out the updated map and do other necessary tasks */
     898           2 :     write_relmap_file(shared, &newmap, true, true, true,
     899             :                       (shared ? InvalidOid : MyDatabaseId),
     900             :                       (shared ? GLOBALTABLESPACE_OID : MyDatabaseTableSpace),
     901             :                       DatabasePath);
     902             : 
     903             :     /* Now we can release the lock */
     904           2 :     LWLockRelease(RelationMappingLock);
     905           2 : }
     906             : 
     907             : /*
     908             :  * RELMAP resource manager's routines
     909             :  */
     910             : void
     911           0 : relmap_redo(XLogReaderState *record)
     912             : {
     913           0 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
     914             : 
     915             :     /* Backup blocks are not used in relmap records */
     916           0 :     Assert(!XLogRecHasAnyBlockRefs(record));
     917             : 
     918           0 :     if (info == XLOG_RELMAP_UPDATE)
     919             :     {
     920           0 :         xl_relmap_update *xlrec = (xl_relmap_update *) XLogRecGetData(record);
     921             :         RelMapFile  newmap;
     922             :         char       *dbpath;
     923             : 
     924           0 :         if (xlrec->nbytes != sizeof(RelMapFile))
     925           0 :             elog(PANIC, "relmap_redo: wrong size %u in relmap update record",
     926             :                  xlrec->nbytes);
     927           0 :         memcpy(&newmap, xlrec->data, sizeof(newmap));
     928             : 
     929             :         /* We need to construct the pathname for this database */
     930           0 :         dbpath = GetDatabasePath(xlrec->dbid, xlrec->tsid);
     931             : 
     932             :         /*
     933             :          * Write out the new map and send sinval, but of course don't write a
     934             :          * new WAL entry.  There's no surrounding transaction to tell to
     935             :          * preserve files, either.
     936             :          *
     937             :          * There shouldn't be anyone else updating relmaps during WAL replay,
     938             :          * so we don't bother to take the RelationMappingLock.  We would need
     939             :          * to do so if load_relmap_file needed to interlock against writers.
     940             :          */
     941           0 :         write_relmap_file((xlrec->dbid == InvalidOid), &newmap,
     942             :                           false, true, false,
     943             :                           xlrec->dbid, xlrec->tsid, dbpath);
     944             : 
     945           0 :         pfree(dbpath);
     946             :     }
     947             :     else
     948           0 :         elog(PANIC, "relmap_redo: unknown op code %u", info);
     949           0 : }

Generated by: LCOV version 1.11