LCOV - code coverage report
Current view: top level - src/backend/catalog - storage.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 138 166 83.1 %
Date: 2017-09-29 13:40:31 Functions: 10 11 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * storage.c
       4             :  *    code to create and destroy physical storage for relations
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/catalog/storage.c
      12             :  *
      13             :  * NOTES
      14             :  *    Some of this code used to be in storage/smgr/smgr.c, and the
      15             :  *    function names still reflect that.
      16             :  *
      17             :  *-------------------------------------------------------------------------
      18             :  */
      19             : 
      20             : #include "postgres.h"
      21             : 
      22             : #include "access/visibilitymap.h"
      23             : #include "access/xact.h"
      24             : #include "access/xlog.h"
      25             : #include "access/xloginsert.h"
      26             : #include "access/xlogutils.h"
      27             : #include "catalog/catalog.h"
      28             : #include "catalog/storage.h"
      29             : #include "catalog/storage_xlog.h"
      30             : #include "storage/freespace.h"
      31             : #include "storage/smgr.h"
      32             : #include "utils/memutils.h"
      33             : #include "utils/rel.h"
      34             : 
      35             : /*
      36             :  * We keep a list of all relations (represented as RelFileNode values)
      37             :  * that have been created or deleted in the current transaction.  When
      38             :  * a relation is created, we create the physical file immediately, but
      39             :  * remember it so that we can delete the file again if the current
      40             :  * transaction is aborted.  Conversely, a deletion request is NOT
      41             :  * executed immediately, but is just entered in the list.  When and if
      42             :  * the transaction commits, we can delete the physical file.
      43             :  *
      44             :  * To handle subtransactions, every entry is marked with its transaction
      45             :  * nesting level.  At subtransaction commit, we reassign the subtransaction's
      46             :  * entries to the parent nesting level.  At subtransaction abort, we can
      47             :  * immediately execute the abort-time actions for all entries of the current
      48             :  * nesting level.
      49             :  *
      50             :  * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear
      51             :  * unbetimes.  It'd probably be OK to keep it in TopTransactionContext,
      52             :  * but I'm being paranoid.
      53             :  */
      54             : 
      55             : typedef struct PendingRelDelete
      56             : {
      57             :     RelFileNode relnode;        /* relation that may need to be deleted */
      58             :     BackendId   backend;        /* InvalidBackendId if not a temp rel */
      59             :     bool        atCommit;       /* T=delete at commit; F=delete at abort */
      60             :     int         nestLevel;      /* xact nesting level of request */
      61             :     struct PendingRelDelete *next;  /* linked-list link */
      62             : } PendingRelDelete;
      63             : 
      64             : static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
      65             : 
      66             : /*
      67             :  * RelationCreateStorage
      68             :  *      Create physical storage for a relation.
      69             :  *
      70             :  * Create the underlying disk file storage for the relation. This only
      71             :  * creates the main fork; additional forks are created lazily by the
      72             :  * modules that need them.
      73             :  *
      74             :  * This function is transactional. The creation is WAL-logged, and if the
      75             :  * transaction aborts later on, the storage will be destroyed.
      76             :  */
      77             : void
      78        4112 : RelationCreateStorage(RelFileNode rnode, char relpersistence)
      79             : {
      80             :     PendingRelDelete *pending;
      81             :     SMgrRelation srel;
      82             :     BackendId   backend;
      83             :     bool        needs_wal;
      84             : 
      85        4112 :     switch (relpersistence)
      86             :     {
      87             :         case RELPERSISTENCE_TEMP:
      88         605 :             backend = BackendIdForTempRelations();
      89         605 :             needs_wal = false;
      90         605 :             break;
      91             :         case RELPERSISTENCE_UNLOGGED:
      92          23 :             backend = InvalidBackendId;
      93          23 :             needs_wal = false;
      94          23 :             break;
      95             :         case RELPERSISTENCE_PERMANENT:
      96        3484 :             backend = InvalidBackendId;
      97        3484 :             needs_wal = true;
      98        3484 :             break;
      99             :         default:
     100           0 :             elog(ERROR, "invalid relpersistence: %c", relpersistence);
     101        4112 :             return;             /* placate compiler */
     102             :     }
     103             : 
     104        4112 :     srel = smgropen(rnode, backend);
     105        4112 :     smgrcreate(srel, MAIN_FORKNUM, false);
     106             : 
     107        4112 :     if (needs_wal)
     108        3484 :         log_smgrcreate(&srel->smgr_rnode.node, MAIN_FORKNUM);
     109             : 
     110             :     /* Add the relation to the list of stuff to delete at abort */
     111        4112 :     pending = (PendingRelDelete *)
     112        4112 :         MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
     113        4112 :     pending->relnode = rnode;
     114        4112 :     pending->backend = backend;
     115        4112 :     pending->atCommit = false;   /* delete if abort */
     116        4112 :     pending->nestLevel = GetCurrentTransactionNestLevel();
     117        4112 :     pending->next = pendingDeletes;
     118        4112 :     pendingDeletes = pending;
     119             : }
     120             : 
     121             : /*
     122             :  * Perform XLogInsert of an XLOG_SMGR_CREATE record to WAL.
     123             :  */
     124             : void
     125        3495 : log_smgrcreate(RelFileNode *rnode, ForkNumber forkNum)
     126             : {
     127             :     xl_smgr_create xlrec;
     128             : 
     129             :     /*
     130             :      * Make an XLOG entry reporting the file creation.
     131             :      */
     132        3495 :     xlrec.rnode = *rnode;
     133        3495 :     xlrec.forkNum = forkNum;
     134             : 
     135        3495 :     XLogBeginInsert();
     136        3495 :     XLogRegisterData((char *) &xlrec, sizeof(xlrec));
     137        3495 :     XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLR_SPECIAL_REL_UPDATE);
     138        3495 : }
     139             : 
     140             : /*
     141             :  * RelationDropStorage
     142             :  *      Schedule unlinking of physical storage at transaction commit.
     143             :  */
     144             : void
     145        3027 : RelationDropStorage(Relation rel)
     146             : {
     147             :     PendingRelDelete *pending;
     148             : 
     149             :     /* Add the relation to the list of stuff to delete at commit */
     150        3027 :     pending = (PendingRelDelete *)
     151        3027 :         MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
     152        3027 :     pending->relnode = rel->rd_node;
     153        3027 :     pending->backend = rel->rd_backend;
     154        3027 :     pending->atCommit = true;    /* delete if commit */
     155        3027 :     pending->nestLevel = GetCurrentTransactionNestLevel();
     156        3027 :     pending->next = pendingDeletes;
     157        3027 :     pendingDeletes = pending;
     158             : 
     159             :     /*
     160             :      * NOTE: if the relation was created in this transaction, it will now be
     161             :      * present in the pending-delete list twice, once with atCommit true and
     162             :      * once with atCommit false.  Hence, it will be physically deleted at end
     163             :      * of xact in either case (and the other entry will be ignored by
     164             :      * smgrDoPendingDeletes, so no error will occur).  We could instead remove
     165             :      * the existing list entry and delete the physical file immediately, but
     166             :      * for now I'll keep the logic simple.
     167             :      */
     168             : 
     169        3027 :     RelationCloseSmgr(rel);
     170        3027 : }
     171             : 
     172             : /*
     173             :  * RelationPreserveStorage
     174             :  *      Mark a relation as not to be deleted after all.
     175             :  *
     176             :  * We need this function because relation mapping changes are committed
     177             :  * separately from commit of the whole transaction, so it's still possible
     178             :  * for the transaction to abort after the mapping update is done.
     179             :  * When a new physical relation is installed in the map, it would be
     180             :  * scheduled for delete-on-abort, so we'd delete it, and be in trouble.
     181             :  * The relation mapper fixes this by telling us to not delete such relations
     182             :  * after all as part of its commit.
     183             :  *
     184             :  * We also use this to reuse an old build of an index during ALTER TABLE, this
     185             :  * time removing the delete-at-commit entry.
     186             :  *
     187             :  * No-op if the relation is not among those scheduled for deletion.
     188             :  */
     189             : void
     190          57 : RelationPreserveStorage(RelFileNode rnode, bool atCommit)
     191             : {
     192             :     PendingRelDelete *pending;
     193             :     PendingRelDelete *prev;
     194             :     PendingRelDelete *next;
     195             : 
     196          57 :     prev = NULL;
     197         308 :     for (pending = pendingDeletes; pending != NULL; pending = next)
     198             :     {
     199         251 :         next = pending->next;
     200         251 :         if (RelFileNodeEquals(rnode, pending->relnode)
     201          14 :             && pending->atCommit == atCommit)
     202             :         {
     203             :             /* unlink and delete list entry */
     204          14 :             if (prev)
     205           9 :                 prev->next = next;
     206             :             else
     207           5 :                 pendingDeletes = next;
     208          14 :             pfree(pending);
     209             :             /* prev does not change */
     210             :         }
     211             :         else
     212             :         {
     213             :             /* unrelated entry, don't touch it */
     214         237 :             prev = pending;
     215             :         }
     216             :     }
     217          57 : }
     218             : 
     219             : /*
     220             :  * RelationTruncate
     221             :  *      Physically truncate a relation to the specified number of blocks.
     222             :  *
     223             :  * This includes getting rid of any buffers for the blocks that are to be
     224             :  * dropped.
     225             :  */
     226             : void
     227          30 : RelationTruncate(Relation rel, BlockNumber nblocks)
     228             : {
     229             :     bool        fsm;
     230             :     bool        vm;
     231             : 
     232             :     /* Open it at the smgr level if not already done */
     233          30 :     RelationOpenSmgr(rel);
     234             : 
     235             :     /*
     236             :      * Make sure smgr_targblock etc aren't pointing somewhere past new end
     237             :      */
     238          30 :     rel->rd_smgr->smgr_targblock = InvalidBlockNumber;
     239          30 :     rel->rd_smgr->smgr_fsm_nblocks = InvalidBlockNumber;
     240          30 :     rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
     241             : 
     242             :     /* Truncate the FSM first if it exists */
     243          30 :     fsm = smgrexists(rel->rd_smgr, FSM_FORKNUM);
     244          30 :     if (fsm)
     245          12 :         FreeSpaceMapTruncateRel(rel, nblocks);
     246             : 
     247             :     /* Truncate the visibility map too if it exists. */
     248          30 :     vm = smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM);
     249          30 :     if (vm)
     250          12 :         visibilitymap_truncate(rel, nblocks);
     251             : 
     252             :     /*
     253             :      * We WAL-log the truncation before actually truncating, which means
     254             :      * trouble if the truncation fails. If we then crash, the WAL replay
     255             :      * likely isn't going to succeed in the truncation either, and cause a
     256             :      * PANIC. It's tempting to put a critical section here, but that cure
     257             :      * would be worse than the disease. It would turn a usually harmless
     258             :      * failure to truncate, that might spell trouble at WAL replay, into a
     259             :      * certain PANIC.
     260             :      */
     261          30 :     if (RelationNeedsWAL(rel))
     262             :     {
     263             :         /*
     264             :          * Make an XLOG entry reporting the file truncation.
     265             :          */
     266             :         XLogRecPtr  lsn;
     267             :         xl_smgr_truncate xlrec;
     268             : 
     269          12 :         xlrec.blkno = nblocks;
     270          12 :         xlrec.rnode = rel->rd_node;
     271          12 :         xlrec.flags = SMGR_TRUNCATE_ALL;
     272             : 
     273          12 :         XLogBeginInsert();
     274          12 :         XLogRegisterData((char *) &xlrec, sizeof(xlrec));
     275             : 
     276          12 :         lsn = XLogInsert(RM_SMGR_ID,
     277             :                          XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
     278             : 
     279             :         /*
     280             :          * Flush, because otherwise the truncation of the main relation might
     281             :          * hit the disk before the WAL record, and the truncation of the FSM
     282             :          * or visibility map. If we crashed during that window, we'd be left
     283             :          * with a truncated heap, but the FSM or visibility map would still
     284             :          * contain entries for the non-existent heap pages.
     285             :          */
     286          12 :         if (fsm || vm)
     287          12 :             XLogFlush(lsn);
     288             :     }
     289             : 
     290             :     /* Do the real work */
     291          30 :     smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
     292          30 : }
     293             : 
     294             : /*
     295             :  *  smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
     296             :  *
     297             :  * This also runs when aborting a subxact; we want to clean up a failed
     298             :  * subxact immediately.
     299             :  *
     300             :  * Note: It's possible that we're being asked to remove a relation that has
     301             :  * no physical storage in any fork. In particular, it's possible that we're
     302             :  * cleaning up an old temporary relation for which RemovePgTempFiles has
     303             :  * already recovered the physical storage.
     304             :  */
     305             : void
     306       26484 : smgrDoPendingDeletes(bool isCommit)
     307             : {
     308       26484 :     int         nestLevel = GetCurrentTransactionNestLevel();
     309             :     PendingRelDelete *pending;
     310             :     PendingRelDelete *prev;
     311             :     PendingRelDelete *next;
     312       26484 :     int         nrels = 0,
     313       26484 :                 i = 0,
     314       26484 :                 maxrels = 0;
     315       26484 :     SMgrRelation *srels = NULL;
     316             : 
     317       26484 :     prev = NULL;
     318       33723 :     for (pending = pendingDeletes; pending != NULL; pending = next)
     319             :     {
     320        7239 :         next = pending->next;
     321        7239 :         if (pending->nestLevel < nestLevel)
     322             :         {
     323             :             /* outer-level entries should not be processed yet */
     324         117 :             prev = pending;
     325             :         }
     326             :         else
     327             :         {
     328             :             /* unlink list entry first, so we don't retry on failure */
     329        7122 :             if (prev)
     330           0 :                 prev->next = next;
     331             :             else
     332        7122 :                 pendingDeletes = next;
     333             :             /* do deletion if called for */
     334        7122 :             if (pending->atCommit == isCommit)
     335             :             {
     336             :                 SMgrRelation srel;
     337             : 
     338        3173 :                 srel = smgropen(pending->relnode, pending->backend);
     339             : 
     340             :                 /* allocate the initial array, or extend it, if needed */
     341        3173 :                 if (maxrels == 0)
     342             :                 {
     343        1026 :                     maxrels = 8;
     344        1026 :                     srels = palloc(sizeof(SMgrRelation) * maxrels);
     345             :                 }
     346        2147 :                 else if (maxrels <= nrels)
     347             :                 {
     348          74 :                     maxrels *= 2;
     349          74 :                     srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
     350             :                 }
     351             : 
     352        3173 :                 srels[nrels++] = srel;
     353             :             }
     354             :             /* must explicitly free the list entry */
     355        7122 :             pfree(pending);
     356             :             /* prev does not change */
     357             :         }
     358             :     }
     359             : 
     360       26484 :     if (nrels > 0)
     361             :     {
     362        1026 :         smgrdounlinkall(srels, nrels, false);
     363             : 
     364        4199 :         for (i = 0; i < nrels; i++)
     365        3173 :             smgrclose(srels[i]);
     366             : 
     367        1026 :         pfree(srels);
     368             :     }
     369       26484 : }
     370             : 
     371             : /*
     372             :  * smgrGetPendingDeletes() -- Get a list of non-temp relations to be deleted.
     373             :  *
     374             :  * The return value is the number of relations scheduled for termination.
     375             :  * *ptr is set to point to a freshly-palloc'd array of RelFileNodes.
     376             :  * If there are no relations to be deleted, *ptr is set to NULL.
     377             :  *
     378             :  * Only non-temporary relations are included in the returned list.  This is OK
     379             :  * because the list is used only in contexts where temporary relations don't
     380             :  * matter: we're either writing to the two-phase state file (and transactions
     381             :  * that have touched temp tables can't be prepared) or we're writing to xlog
     382             :  * (and all temporary files will be zapped if we restart anyway, so no need
     383             :  * for redo to do it also).
     384             :  *
     385             :  * Note that the list does not include anything scheduled for termination
     386             :  * by upper-level transactions.
     387             :  */
     388             : int
     389       23502 : smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
     390             : {
     391       23502 :     int         nestLevel = GetCurrentTransactionNestLevel();
     392             :     int         nrels;
     393             :     RelFileNode *rptr;
     394             :     PendingRelDelete *pending;
     395             : 
     396       23502 :     nrels = 0;
     397       30670 :     for (pending = pendingDeletes; pending != NULL; pending = pending->next)
     398             :     {
     399        7168 :         if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
     400        3176 :             && pending->backend == InvalidBackendId)
     401        2571 :             nrels++;
     402             :     }
     403       23502 :     if (nrels == 0)
     404             :     {
     405       22604 :         *ptr = NULL;
     406       22604 :         return 0;
     407             :     }
     408         898 :     rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode));
     409         898 :     *ptr = rptr;
     410        3968 :     for (pending = pendingDeletes; pending != NULL; pending = pending->next)
     411             :     {
     412        3070 :         if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
     413        2576 :             && pending->backend == InvalidBackendId)
     414             :         {
     415        2571 :             *rptr = pending->relnode;
     416        2571 :             rptr++;
     417             :         }
     418             :     }
     419         898 :     return nrels;
     420             : }
     421             : 
     422             : /*
     423             :  *  PostPrepare_smgr -- Clean up after a successful PREPARE
     424             :  *
     425             :  * What we have to do here is throw away the in-memory state about pending
     426             :  * relation deletes.  It's all been recorded in the 2PC state file and
     427             :  * it's no longer smgr's job to worry about it.
     428             :  */
     429             : void
     430           6 : PostPrepare_smgr(void)
     431             : {
     432             :     PendingRelDelete *pending;
     433             :     PendingRelDelete *next;
     434             : 
     435           9 :     for (pending = pendingDeletes; pending != NULL; pending = next)
     436             :     {
     437           3 :         next = pending->next;
     438           3 :         pendingDeletes = next;
     439             :         /* must explicitly free the list entry */
     440           3 :         pfree(pending);
     441             :     }
     442           6 : }
     443             : 
     444             : 
     445             : /*
     446             :  * AtSubCommit_smgr() --- Take care of subtransaction commit.
     447             :  *
     448             :  * Reassign all items in the pending-deletes list to the parent transaction.
     449             :  */
     450             : void
     451          49 : AtSubCommit_smgr(void)
     452             : {
     453          49 :     int         nestLevel = GetCurrentTransactionNestLevel();
     454             :     PendingRelDelete *pending;
     455             : 
     456          90 :     for (pending = pendingDeletes; pending != NULL; pending = pending->next)
     457             :     {
     458          41 :         if (pending->nestLevel >= nestLevel)
     459          22 :             pending->nestLevel = nestLevel - 1;
     460             :     }
     461          49 : }
     462             : 
     463             : /*
     464             :  * AtSubAbort_smgr() --- Take care of subtransaction abort.
     465             :  *
     466             :  * Delete created relations and forget about deleted relations.
     467             :  * We can execute these operations immediately because we know this
     468             :  * subtransaction will not commit.
     469             :  */
     470             : void
     471         323 : AtSubAbort_smgr(void)
     472             : {
     473         323 :     smgrDoPendingDeletes(false);
     474         323 : }
     475             : 
     476             : void
     477           0 : smgr_redo(XLogReaderState *record)
     478             : {
     479           0 :     XLogRecPtr  lsn = record->EndRecPtr;
     480           0 :     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
     481             : 
     482             :     /* Backup blocks are not used in smgr records */
     483           0 :     Assert(!XLogRecHasAnyBlockRefs(record));
     484             : 
     485           0 :     if (info == XLOG_SMGR_CREATE)
     486             :     {
     487           0 :         xl_smgr_create *xlrec = (xl_smgr_create *) XLogRecGetData(record);
     488             :         SMgrRelation reln;
     489             : 
     490           0 :         reln = smgropen(xlrec->rnode, InvalidBackendId);
     491           0 :         smgrcreate(reln, xlrec->forkNum, true);
     492             :     }
     493           0 :     else if (info == XLOG_SMGR_TRUNCATE)
     494             :     {
     495           0 :         xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
     496             :         SMgrRelation reln;
     497             :         Relation    rel;
     498             : 
     499           0 :         reln = smgropen(xlrec->rnode, InvalidBackendId);
     500             : 
     501             :         /*
     502             :          * Forcibly create relation if it doesn't exist (which suggests that
     503             :          * it was dropped somewhere later in the WAL sequence).  As in
     504             :          * XLogReadBufferForRedo, we prefer to recreate the rel and replay the
     505             :          * log as best we can until the drop is seen.
     506             :          */
     507           0 :         smgrcreate(reln, MAIN_FORKNUM, true);
     508             : 
     509             :         /*
     510             :          * Before we perform the truncation, update minimum recovery point to
     511             :          * cover this WAL record. Once the relation is truncated, there's no
     512             :          * going back. The buffer manager enforces the WAL-first rule for
     513             :          * normal updates to relation files, so that the minimum recovery
     514             :          * point is always updated before the corresponding change in the data
     515             :          * file is flushed to disk. We have to do the same manually here.
     516             :          *
     517             :          * Doing this before the truncation means that if the truncation fails
     518             :          * for some reason, you cannot start up the system even after restart,
     519             :          * until you fix the underlying situation so that the truncation will
     520             :          * succeed. Alternatively, we could update the minimum recovery point
     521             :          * after truncation, but that would leave a small window where the
     522             :          * WAL-first rule could be violated.
     523             :          */
     524           0 :         XLogFlush(lsn);
     525             : 
     526           0 :         if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
     527             :         {
     528           0 :             smgrtruncate(reln, MAIN_FORKNUM, xlrec->blkno);
     529             : 
     530             :             /* Also tell xlogutils.c about it */
     531           0 :             XLogTruncateRelation(xlrec->rnode, MAIN_FORKNUM, xlrec->blkno);
     532             :         }
     533             : 
     534             :         /* Truncate FSM and VM too */
     535           0 :         rel = CreateFakeRelcacheEntry(xlrec->rnode);
     536             : 
     537           0 :         if ((xlrec->flags & SMGR_TRUNCATE_FSM) != 0 &&
     538           0 :             smgrexists(reln, FSM_FORKNUM))
     539           0 :             FreeSpaceMapTruncateRel(rel, xlrec->blkno);
     540           0 :         if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0 &&
     541           0 :             smgrexists(reln, VISIBILITYMAP_FORKNUM))
     542           0 :             visibilitymap_truncate(rel, xlrec->blkno);
     543             : 
     544           0 :         FreeFakeRelcacheEntry(rel);
     545             :     }
     546             :     else
     547           0 :         elog(PANIC, "smgr_redo: unknown op code %u", info);
     548           0 : }

Generated by: LCOV version 1.11