LCOV - code coverage report
Current view: top level - src/backend/access/transam - xloginsert.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 261 332 78.6 %
Date: 2017-09-29 15:12:54 Functions: 13 16 81.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * xloginsert.c
       4             :  *      Functions for constructing WAL records
       5             :  *
       6             :  * Constructing a WAL record begins with a call to XLogBeginInsert,
       7             :  * followed by a number of XLogRegister* calls. The registered data is
       8             :  * collected in private working memory, and finally assembled into a chain
       9             :  * of XLogRecData structs by a call to XLogRecordAssemble(). See
      10             :  * access/transam/README for details.
      11             :  *
      12             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      13             :  * Portions Copyright (c) 1994, Regents of the University of California
      14             :  *
      15             :  * src/backend/access/transam/xloginsert.c
      16             :  *
      17             :  *-------------------------------------------------------------------------
      18             :  */
      19             : 
      20             : #include "postgres.h"
      21             : 
      22             : #include "access/xact.h"
      23             : #include "access/xlog.h"
      24             : #include "access/xlog_internal.h"
      25             : #include "access/xloginsert.h"
      26             : #include "catalog/pg_control.h"
      27             : #include "common/pg_lzcompress.h"
      28             : #include "miscadmin.h"
      29             : #include "replication/origin.h"
      30             : #include "storage/bufmgr.h"
      31             : #include "storage/proc.h"
      32             : #include "utils/memutils.h"
      33             : #include "pg_trace.h"
      34             : 
      35             : /* Buffer size required to store a compressed version of backup block image */
      36             : #define PGLZ_MAX_BLCKSZ PGLZ_MAX_OUTPUT(BLCKSZ)
      37             : 
      38             : /*
      39             :  * For each block reference registered with XLogRegisterBuffer, we fill in
      40             :  * a registered_buffer struct.
      41             :  */
      42             : typedef struct
      43             : {
      44             :     bool        in_use;         /* is this slot in use? */
      45             :     uint8       flags;          /* REGBUF_* flags */
      46             :     RelFileNode rnode;          /* identifies the relation and block */
      47             :     ForkNumber  forkno;
      48             :     BlockNumber block;
      49             :     Page        page;           /* page content */
      50             :     uint32      rdata_len;      /* total length of data in rdata chain */
      51             :     XLogRecData *rdata_head;    /* head of the chain of data registered with
      52             :                                  * this block */
      53             :     XLogRecData *rdata_tail;    /* last entry in the chain, or &rdata_head if
      54             :                                  * empty */
      55             : 
      56             :     XLogRecData bkp_rdatas[2];  /* temporary rdatas used to hold references to
      57             :                                  * backup block data in XLogRecordAssemble() */
      58             : 
      59             :     /* buffer to store a compressed version of backup block image */
      60             :     char        compressed_page[PGLZ_MAX_BLCKSZ];
      61             : } registered_buffer;
      62             : 
      63             : static registered_buffer *registered_buffers;
      64             : static int  max_registered_buffers; /* allocated size */
      65             : static int  max_registered_block_id = 0;    /* highest block_id + 1 currently
      66             :                                              * registered */
      67             : 
      68             : /*
      69             :  * A chain of XLogRecDatas to hold the "main data" of a WAL record, registered
      70             :  * with XLogRegisterData(...).
      71             :  */
      72             : static XLogRecData *mainrdata_head;
      73             : static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head;
      74             : static uint32 mainrdata_len;    /* total # of bytes in chain */
      75             : 
      76             : /* flags for the in-progress insertion */
      77             : static uint8 curinsert_flags = 0;
      78             : 
      79             : /*
      80             :  * These are used to hold the record header while constructing a record.
      81             :  * 'hdr_scratch' is not a plain variable, but is palloc'd at initialization,
      82             :  * because we want it to be MAXALIGNed and padding bytes zeroed.
      83             :  *
      84             :  * For simplicity, it's allocated large enough to hold the headers for any
      85             :  * WAL record.
      86             :  */
      87             : static XLogRecData hdr_rdt;
      88             : static char *hdr_scratch = NULL;
      89             : 
      90             : #define SizeOfXlogOrigin    (sizeof(RepOriginId) + sizeof(char))
      91             : 
      92             : #define HEADER_SCRATCH_SIZE \
      93             :     (SizeOfXLogRecord + \
      94             :      MaxSizeOfXLogRecordBlockHeader * (XLR_MAX_BLOCK_ID + 1) + \
      95             :      SizeOfXLogRecordDataHeaderLong + SizeOfXlogOrigin)
      96             : 
      97             : /*
      98             :  * An array of XLogRecData structs, to hold registered data.
      99             :  */
     100             : static XLogRecData *rdatas;
     101             : static int  num_rdatas;         /* entries currently used */
     102             : static int  max_rdatas;         /* allocated size */
     103             : 
     104             : static bool begininsert_called = false;
     105             : 
     106             : /* Memory context to hold the registered buffer and data references. */
     107             : static MemoryContext xloginsert_cxt;
     108             : 
     109             : static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
     110             :                    XLogRecPtr RedoRecPtr, bool doPageWrites,
     111             :                    XLogRecPtr *fpw_lsn);
     112             : static bool XLogCompressBackupBlock(char *page, uint16 hole_offset,
     113             :                         uint16 hole_length, char *dest, uint16 *dlen);
     114             : 
     115             : /*
     116             :  * Begin constructing a WAL record. This must be called before the
     117             :  * XLogRegister* functions and XLogInsert().
     118             :  */
     119             : void
     120     1397899 : XLogBeginInsert(void)
     121             : {
     122     1397899 :     Assert(max_registered_block_id == 0);
     123     1397899 :     Assert(mainrdata_last == (XLogRecData *) &mainrdata_head);
     124     1397899 :     Assert(mainrdata_len == 0);
     125             : 
     126             :     /* cross-check on whether we should be here or not */
     127     1397899 :     if (!XLogInsertAllowed())
     128           0 :         elog(ERROR, "cannot make new WAL entries during recovery");
     129             : 
     130     1397899 :     if (begininsert_called)
     131           0 :         elog(ERROR, "XLogBeginInsert was already called");
     132             : 
     133     1397899 :     begininsert_called = true;
     134     1397899 : }
     135             : 
     136             : /*
     137             :  * Ensure that there are enough buffer and data slots in the working area,
     138             :  * for subsequent XLogRegisterBuffer, XLogRegisterData and XLogRegisterBufData
     139             :  * calls.
     140             :  *
     141             :  * There is always space for a small number of buffers and data chunks, enough
     142             :  * for most record types. This function is for the exceptional cases that need
     143             :  * more.
     144             :  */
     145             : void
     146        1321 : XLogEnsureRecordSpace(int max_block_id, int ndatas)
     147             : {
     148             :     int         nbuffers;
     149             : 
     150             :     /*
     151             :      * This must be called before entering a critical section, because
     152             :      * allocating memory inside a critical section can fail. repalloc() will
     153             :      * check the same, but better to check it here too so that we fail
     154             :      * consistently even if the arrays happen to be large enough already.
     155             :      */
     156        1321 :     Assert(CritSectionCount == 0);
     157             : 
     158             :     /* the minimum values can't be decreased */
     159        1321 :     if (max_block_id < XLR_NORMAL_MAX_BLOCK_ID)
     160        1282 :         max_block_id = XLR_NORMAL_MAX_BLOCK_ID;
     161        1321 :     if (ndatas < XLR_NORMAL_RDATAS)
     162        1309 :         ndatas = XLR_NORMAL_RDATAS;
     163             : 
     164        1321 :     if (max_block_id > XLR_MAX_BLOCK_ID)
     165           0 :         elog(ERROR, "maximum number of WAL record block references exceeded");
     166        1321 :     nbuffers = max_block_id + 1;
     167             : 
     168        1321 :     if (nbuffers > max_registered_buffers)
     169             :     {
     170           2 :         registered_buffers = (registered_buffer *)
     171           2 :             repalloc(registered_buffers, sizeof(registered_buffer) * nbuffers);
     172             : 
     173             :         /*
     174             :          * At least the padding bytes in the structs must be zeroed, because
     175             :          * they are included in WAL data, but initialize it all for tidiness.
     176             :          */
     177           2 :         MemSet(&registered_buffers[max_registered_buffers], 0,
     178             :                (nbuffers - max_registered_buffers) * sizeof(registered_buffer));
     179           2 :         max_registered_buffers = nbuffers;
     180             :     }
     181             : 
     182        1321 :     if (ndatas > max_rdatas)
     183             :     {
     184           3 :         rdatas = (XLogRecData *) repalloc(rdatas, sizeof(XLogRecData) * ndatas);
     185           3 :         max_rdatas = ndatas;
     186             :     }
     187        1321 : }
     188             : 
     189             : /*
     190             :  * Reset WAL record construction buffers.
     191             :  */
     192             : void
     193     1401528 : XLogResetInsertion(void)
     194             : {
     195             :     int         i;
     196             : 
     197     2895300 :     for (i = 0; i < max_registered_block_id; i++)
     198     1493772 :         registered_buffers[i].in_use = false;
     199             : 
     200     1401528 :     num_rdatas = 0;
     201     1401528 :     max_registered_block_id = 0;
     202     1401528 :     mainrdata_len = 0;
     203     1401528 :     mainrdata_last = (XLogRecData *) &mainrdata_head;
     204     1401528 :     curinsert_flags = 0;
     205     1401528 :     begininsert_called = false;
     206     1401528 : }
     207             : 
     208             : /*
     209             :  * Register a reference to a buffer with the WAL record being constructed.
     210             :  * This must be called for every page that the WAL-logged operation modifies.
     211             :  */
     212             : void
     213     1488047 : XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
     214             : {
     215             :     registered_buffer *regbuf;
     216             : 
     217             :     /* NO_IMAGE doesn't make sense with FORCE_IMAGE */
     218     1488047 :     Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
     219     1488047 :     Assert(begininsert_called);
     220             : 
     221     1488047 :     if (block_id >= max_registered_block_id)
     222             :     {
     223     1384697 :         if (block_id >= max_registered_buffers)
     224           0 :             elog(ERROR, "too many registered buffers");
     225     1384697 :         max_registered_block_id = block_id + 1;
     226             :     }
     227             : 
     228     1488047 :     regbuf = &registered_buffers[block_id];
     229             : 
     230     1488047 :     BufferGetTag(buffer, &regbuf->rnode, &regbuf->forkno, &regbuf->block);
     231     1488047 :     regbuf->page = BufferGetPage(buffer);
     232     1488047 :     regbuf->flags = flags;
     233     1488047 :     regbuf->rdata_tail = (XLogRecData *) &regbuf->rdata_head;
     234     1488047 :     regbuf->rdata_len = 0;
     235             : 
     236             :     /*
     237             :      * Check that this page hasn't already been registered with some other
     238             :      * block_id.
     239             :      */
     240             : #ifdef USE_ASSERT_CHECKING
     241             :     {
     242             :         int         i;
     243             : 
     244     3202696 :         for (i = 0; i < max_registered_block_id; i++)
     245             :         {
     246     1714649 :             registered_buffer *regbuf_old = &registered_buffers[i];
     247             : 
     248     1714649 :             if (i == block_id || !regbuf_old->in_use)
     249     1594748 :                 continue;
     250             : 
     251      119901 :             Assert(!RelFileNodeEquals(regbuf_old->rnode, regbuf->rnode) ||
     252             :                    regbuf_old->forkno != regbuf->forkno ||
     253             :                    regbuf_old->block != regbuf->block);
     254             :         }
     255             :     }
     256             : #endif
     257             : 
     258     1488047 :     regbuf->in_use = true;
     259     1488047 : }
     260             : 
     261             : /*
     262             :  * Like XLogRegisterBuffer, but for registering a block that's not in the
     263             :  * shared buffer pool (i.e. when you don't have a Buffer for it).
     264             :  */
     265             : void
     266        3827 : XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
     267             :                   BlockNumber blknum, Page page, uint8 flags)
     268             : {
     269             :     registered_buffer *regbuf;
     270             : 
     271             :     /* This is currently only used to WAL-log a full-page image of a page */
     272        3827 :     Assert(flags & REGBUF_FORCE_IMAGE);
     273        3827 :     Assert(begininsert_called);
     274             : 
     275        3827 :     if (block_id >= max_registered_block_id)
     276        3827 :         max_registered_block_id = block_id + 1;
     277             : 
     278        3827 :     if (block_id >= max_registered_buffers)
     279           0 :         elog(ERROR, "too many registered buffers");
     280             : 
     281        3827 :     regbuf = &registered_buffers[block_id];
     282             : 
     283        3827 :     regbuf->rnode = *rnode;
     284        3827 :     regbuf->forkno = forknum;
     285        3827 :     regbuf->block = blknum;
     286        3827 :     regbuf->page = page;
     287        3827 :     regbuf->flags = flags;
     288        3827 :     regbuf->rdata_tail = (XLogRecData *) &regbuf->rdata_head;
     289        3827 :     regbuf->rdata_len = 0;
     290             : 
     291             :     /*
     292             :      * Check that this page hasn't already been registered with some other
     293             :      * block_id.
     294             :      */
     295             : #ifdef USE_ASSERT_CHECKING
     296             :     {
     297             :         int         i;
     298             : 
     299        7654 :         for (i = 0; i < max_registered_block_id; i++)
     300             :         {
     301        3827 :             registered_buffer *regbuf_old = &registered_buffers[i];
     302             : 
     303        3827 :             if (i == block_id || !regbuf_old->in_use)
     304        3827 :                 continue;
     305             : 
     306           0 :             Assert(!RelFileNodeEquals(regbuf_old->rnode, regbuf->rnode) ||
     307             :                    regbuf_old->forkno != regbuf->forkno ||
     308             :                    regbuf_old->block != regbuf->block);
     309             :         }
     310             :     }
     311             : #endif
     312             : 
     313        3827 :     regbuf->in_use = true;
     314        3827 : }
     315             : 
     316             : /*
     317             :  * Add data to the WAL record that's being constructed.
     318             :  *
     319             :  * The data is appended to the "main chunk", available at replay with
     320             :  * XLogRecGetData().
     321             :  */
     322             : void
     323     1520170 : XLogRegisterData(char *data, int len)
     324             : {
     325             :     XLogRecData *rdata;
     326             : 
     327     1520170 :     Assert(begininsert_called);
     328             : 
     329     1520170 :     if (num_rdatas >= max_rdatas)
     330           0 :         elog(ERROR, "too much WAL data");
     331     1520170 :     rdata = &rdatas[num_rdatas++];
     332             : 
     333     1520170 :     rdata->data = data;
     334     1520170 :     rdata->len = len;
     335             : 
     336             :     /*
     337             :      * we use the mainrdata_last pointer to track the end of the chain, so no
     338             :      * need to clear 'next' here.
     339             :      */
     340             : 
     341     1520170 :     mainrdata_last->next = rdata;
     342     1520170 :     mainrdata_last = rdata;
     343             : 
     344     1520170 :     mainrdata_len += len;
     345     1520170 : }
     346             : 
     347             : /*
     348             :  * Add buffer-specific data to the WAL record that's being constructed.
     349             :  *
     350             :  * Block_id must reference a block previously registered with
     351             :  * XLogRegisterBuffer(). If this is called more than once for the same
     352             :  * block_id, the data is appended.
     353             :  *
     354             :  * The maximum amount of data that can be registered per block is 65535
     355             :  * bytes. That should be plenty; if you need more than BLCKSZ bytes to
     356             :  * reconstruct the changes to the page, you might as well just log a full
     357             :  * copy of it. (the "main data" that's not associated with a block is not
     358             :  * limited)
     359             :  */
     360             : void
     361     2032147 : XLogRegisterBufData(uint8 block_id, char *data, int len)
     362             : {
     363             :     registered_buffer *regbuf;
     364             :     XLogRecData *rdata;
     365             : 
     366     2032147 :     Assert(begininsert_called);
     367             : 
     368             :     /* find the registered buffer struct */
     369     2032147 :     regbuf = &registered_buffers[block_id];
     370     2032147 :     if (!regbuf->in_use)
     371           0 :         elog(ERROR, "no block with id %d registered with WAL insertion",
     372             :              block_id);
     373             : 
     374     2032147 :     if (num_rdatas >= max_rdatas)
     375           0 :         elog(ERROR, "too much WAL data");
     376     2032147 :     rdata = &rdatas[num_rdatas++];
     377             : 
     378     2032147 :     rdata->data = data;
     379     2032147 :     rdata->len = len;
     380             : 
     381     2032147 :     regbuf->rdata_tail->next = rdata;
     382     2032147 :     regbuf->rdata_tail = rdata;
     383     2032147 :     regbuf->rdata_len += len;
     384     2032147 : }
     385             : 
     386             : /*
     387             :  * Set insert status flags for the upcoming WAL record.
     388             :  *
     389             :  * The flags that can be used here are:
     390             :  * - XLOG_INCLUDE_ORIGIN, to determine if the replication origin should be
     391             :  *   included in the record.
     392             :  * - XLOG_MARK_UNIMPORTANT, to signal that the record is not important for
     393             :  *   durability, which allows to avoid triggering WAL archiving and other
     394             :  *   background activity.
     395             :  */
     396             : void
     397      733889 : XLogSetRecordFlags(uint8 flags)
     398             : {
     399      733889 :     Assert(begininsert_called);
     400      733889 :     curinsert_flags = flags;
     401      733889 : }
     402             : 
     403             : /*
     404             :  * Insert an XLOG record having the specified RMID and info bytes, with the
     405             :  * body of the record being the data and buffer references registered earlier
     406             :  * with XLogRegister* calls.
     407             :  *
     408             :  * Returns XLOG pointer to end of record (beginning of next record).
     409             :  * This can be used as LSN for data pages affected by the logged action.
     410             :  * (LSN is the XLOG point up to which the XLOG must be flushed to disk
     411             :  * before the data page can be written out.  This implements the basic
     412             :  * WAL rule "write the log before the data".)
     413             :  */
     414             : XLogRecPtr
     415     1397899 : XLogInsert(RmgrId rmid, uint8 info)
     416             : {
     417             :     XLogRecPtr  EndPos;
     418             : 
     419             :     /* XLogBeginInsert() must have been called. */
     420     1397899 :     if (!begininsert_called)
     421           0 :         elog(ERROR, "XLogBeginInsert was not called");
     422             : 
     423             :     /*
     424             :      * The caller can set rmgr bits, XLR_SPECIAL_REL_UPDATE and
     425             :      * XLR_CHECK_CONSISTENCY; the rest are reserved for use by me.
     426             :      */
     427     1397899 :     if ((info & ~(XLR_RMGR_INFO_MASK |
     428             :                   XLR_SPECIAL_REL_UPDATE |
     429             :                   XLR_CHECK_CONSISTENCY)) != 0)
     430           0 :         elog(PANIC, "invalid xlog info mask %02X", info);
     431             : 
     432             :     TRACE_POSTGRESQL_WAL_INSERT(rmid, info);
     433             : 
     434             :     /*
     435             :      * In bootstrap mode, we don't actually log anything but XLOG resources;
     436             :      * return a phony record pointer.
     437             :      */
     438     1397899 :     if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
     439             :     {
     440        7793 :         XLogResetInsertion();
     441        7793 :         EndPos = SizeOfXLogLongPHD; /* start of 1st chkpt record */
     442        7793 :         return EndPos;
     443             :     }
     444             : 
     445             :     do
     446             :     {
     447             :         XLogRecPtr  RedoRecPtr;
     448             :         bool        doPageWrites;
     449             :         XLogRecPtr  fpw_lsn;
     450             :         XLogRecData *rdt;
     451             : 
     452             :         /*
     453             :          * Get values needed to decide whether to do full-page writes. Since
     454             :          * we don't yet have an insertion lock, these could change under us,
     455             :          * but XLogInsertRecord will recheck them once it has a lock.
     456             :          */
     457     1390106 :         GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
     458             : 
     459     1390106 :         rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites,
     460             :                                  &fpw_lsn);
     461             : 
     462     1390106 :         EndPos = XLogInsertRecord(rdt, fpw_lsn, curinsert_flags);
     463     1390106 :     } while (EndPos == InvalidXLogRecPtr);
     464             : 
     465     1390106 :     XLogResetInsertion();
     466             : 
     467     1390106 :     return EndPos;
     468             : }
     469             : 
     470             : /*
     471             :  * Assemble a WAL record from the registered data and buffers into an
     472             :  * XLogRecData chain, ready for insertion with XLogInsertRecord().
     473             :  *
     474             :  * The record header fields are filled in, except for the xl_prev field. The
     475             :  * calculated CRC does not include the record header yet.
     476             :  *
     477             :  * If there are any registered buffers, and a full-page image was not taken
     478             :  * of all of them, *fpw_lsn is set to the lowest LSN among such pages. This
     479             :  * signals that the assembled record is only good for insertion on the
     480             :  * assumption that the RedoRecPtr and doPageWrites values were up-to-date.
     481             :  */
     482             : static XLogRecData *
     483     1390106 : XLogRecordAssemble(RmgrId rmid, uint8 info,
     484             :                    XLogRecPtr RedoRecPtr, bool doPageWrites,
     485             :                    XLogRecPtr *fpw_lsn)
     486             : {
     487             :     XLogRecData *rdt;
     488     1390106 :     uint32      total_len = 0;
     489             :     int         block_id;
     490             :     pg_crc32c   rdata_crc;
     491     1390106 :     registered_buffer *prev_regbuf = NULL;
     492             :     XLogRecData *rdt_datas_last;
     493             :     XLogRecord *rechdr;
     494     1390106 :     char       *scratch = hdr_scratch;
     495             : 
     496             :     /*
     497             :      * Note: this function can be called multiple times for the same record.
     498             :      * All the modifications we do to the rdata chains below must handle that.
     499             :      */
     500             : 
     501             :     /* The record begins with the fixed-size header */
     502     1390106 :     rechdr = (XLogRecord *) scratch;
     503     1390106 :     scratch += SizeOfXLogRecord;
     504             : 
     505     1390106 :     hdr_rdt.next = NULL;
     506     1390106 :     rdt_datas_last = &hdr_rdt;
     507     1390106 :     hdr_rdt.data = hdr_scratch;
     508             : 
     509             :     /*
     510             :      * Enforce consistency checks for this record if user is looking for it.
     511             :      * Do this before at the beginning of this routine to give the possibility
     512             :      * for callers of XLogInsert() to pass XLR_CHECK_CONSISTENCY directly for
     513             :      * a record.
     514             :      */
     515     1390106 :     if (wal_consistency_checking[rmid])
     516           0 :         info |= XLR_CHECK_CONSISTENCY;
     517             : 
     518             :     /*
     519             :      * Make an rdata chain containing all the data portions of all block
     520             :      * references. This includes the data for full-page images. Also append
     521             :      * the headers for the block references in the scratch buffer.
     522             :      */
     523     1390106 :     *fpw_lsn = InvalidXLogRecPtr;
     524     2876426 :     for (block_id = 0; block_id < max_registered_block_id; block_id++)
     525             :     {
     526     1486320 :         registered_buffer *regbuf = &registered_buffers[block_id];
     527             :         bool        needs_backup;
     528             :         bool        needs_data;
     529             :         XLogRecordBlockHeader bkpb;
     530             :         XLogRecordBlockImageHeader bimg;
     531     1486320 :         XLogRecordBlockCompressHeader cbimg = {0};
     532             :         bool        samerel;
     533     1486320 :         bool        is_compressed = false;
     534             :         bool        include_image;
     535             : 
     536     1486320 :         if (!regbuf->in_use)
     537        1898 :             continue;
     538             : 
     539             :         /* Determine if this block needs to be backed up */
     540     1484422 :         if (regbuf->flags & REGBUF_FORCE_IMAGE)
     541        4470 :             needs_backup = true;
     542     1479952 :         else if (regbuf->flags & REGBUF_NO_IMAGE)
     543       40316 :             needs_backup = false;
     544     1439636 :         else if (!doPageWrites)
     545           0 :             needs_backup = false;
     546             :         else
     547             :         {
     548             :             /*
     549             :              * We assume page LSN is first data on *every* page that can be
     550             :              * passed to XLogInsert, whether it has the standard page layout
     551             :              * or not.
     552             :              */
     553     1439636 :             XLogRecPtr  page_lsn = PageGetLSN(regbuf->page);
     554             : 
     555     1439636 :             needs_backup = (page_lsn <= RedoRecPtr);
     556     1439636 :             if (!needs_backup)
     557             :             {
     558     1438994 :                 if (*fpw_lsn == InvalidXLogRecPtr || page_lsn < *fpw_lsn)
     559     1352842 :                     *fpw_lsn = page_lsn;
     560             :             }
     561             :         }
     562             : 
     563             :         /* Determine if the buffer data needs to included */
     564     1484422 :         if (regbuf->rdata_len == 0)
     565      321428 :             needs_data = false;
     566     1162994 :         else if ((regbuf->flags & REGBUF_KEEP_DATA) != 0)
     567           0 :             needs_data = true;
     568             :         else
     569     1162994 :             needs_data = !needs_backup;
     570             : 
     571     1484422 :         bkpb.id = block_id;
     572     1484422 :         bkpb.fork_flags = regbuf->forkno;
     573     1484422 :         bkpb.data_length = 0;
     574             : 
     575     1484422 :         if ((regbuf->flags & REGBUF_WILL_INIT) == REGBUF_WILL_INIT)
     576       36256 :             bkpb.fork_flags |= BKPBLOCK_WILL_INIT;
     577             : 
     578             :         /*
     579             :          * If needs_backup is true or WAL checking is enabled for current
     580             :          * resource manager, log a full-page write for the current block.
     581             :          */
     582     1484422 :         include_image = needs_backup || (info & XLR_CHECK_CONSISTENCY) != 0;
     583             : 
     584     1484422 :         if (include_image)
     585             :         {
     586        5112 :             Page        page = regbuf->page;
     587             :             uint16      compressed_len;
     588             : 
     589             :             /*
     590             :              * The page needs to be backed up, so calculate its hole length
     591             :              * and offset.
     592             :              */
     593        5112 :             if (regbuf->flags & REGBUF_STANDARD)
     594             :             {
     595             :                 /* Assume we can omit data between pd_lower and pd_upper */
     596        4885 :                 uint16      lower = ((PageHeader) page)->pd_lower;
     597        4885 :                 uint16      upper = ((PageHeader) page)->pd_upper;
     598             : 
     599        4885 :                 if (lower >= SizeOfPageHeaderData &&
     600        4883 :                     upper > lower &&
     601             :                     upper <= BLCKSZ)
     602             :                 {
     603        4883 :                     bimg.hole_offset = lower;
     604        4883 :                     cbimg.hole_length = upper - lower;
     605             :                 }
     606             :                 else
     607             :                 {
     608             :                     /* No "hole" to compress out */
     609           2 :                     bimg.hole_offset = 0;
     610           2 :                     cbimg.hole_length = 0;
     611             :                 }
     612             :             }
     613             :             else
     614             :             {
     615             :                 /* Not a standard page header, don't try to eliminate "hole" */
     616         227 :                 bimg.hole_offset = 0;
     617         227 :                 cbimg.hole_length = 0;
     618             :             }
     619             : 
     620             :             /*
     621             :              * Try to compress a block image if wal_compression is enabled
     622             :              */
     623        5112 :             if (wal_compression)
     624             :             {
     625           0 :                 is_compressed =
     626           0 :                     XLogCompressBackupBlock(page, bimg.hole_offset,
     627           0 :                                             cbimg.hole_length,
     628           0 :                                             regbuf->compressed_page,
     629             :                                             &compressed_len);
     630             :             }
     631             : 
     632             :             /*
     633             :              * Fill in the remaining fields in the XLogRecordBlockHeader
     634             :              * struct
     635             :              */
     636        5112 :             bkpb.fork_flags |= BKPBLOCK_HAS_IMAGE;
     637             : 
     638             :             /*
     639             :              * Construct XLogRecData entries for the page content.
     640             :              */
     641        5112 :             rdt_datas_last->next = &regbuf->bkp_rdatas[0];
     642        5112 :             rdt_datas_last = rdt_datas_last->next;
     643             : 
     644        5112 :             bimg.bimg_info = (cbimg.hole_length == 0) ? 0 : BKPIMAGE_HAS_HOLE;
     645             : 
     646             :             /*
     647             :              * If WAL consistency checking is enabled for the resource manager
     648             :              * of this WAL record, a full-page image is included in the record
     649             :              * for the block modified. During redo, the full-page is replayed
     650             :              * only if BKPIMAGE_APPLY is set.
     651             :              */
     652        5112 :             if (needs_backup)
     653        5112 :                 bimg.bimg_info |= BKPIMAGE_APPLY;
     654             : 
     655        5112 :             if (is_compressed)
     656             :             {
     657           0 :                 bimg.length = compressed_len;
     658           0 :                 bimg.bimg_info |= BKPIMAGE_IS_COMPRESSED;
     659             : 
     660           0 :                 rdt_datas_last->data = regbuf->compressed_page;
     661           0 :                 rdt_datas_last->len = compressed_len;
     662             :             }
     663             :             else
     664             :             {
     665        5112 :                 bimg.length = BLCKSZ - cbimg.hole_length;
     666             : 
     667        5112 :                 if (cbimg.hole_length == 0)
     668             :                 {
     669         229 :                     rdt_datas_last->data = page;
     670         229 :                     rdt_datas_last->len = BLCKSZ;
     671             :                 }
     672             :                 else
     673             :                 {
     674             :                     /* must skip the hole */
     675        4883 :                     rdt_datas_last->data = page;
     676        4883 :                     rdt_datas_last->len = bimg.hole_offset;
     677             : 
     678        4883 :                     rdt_datas_last->next = &regbuf->bkp_rdatas[1];
     679        4883 :                     rdt_datas_last = rdt_datas_last->next;
     680             : 
     681        4883 :                     rdt_datas_last->data =
     682        4883 :                         page + (bimg.hole_offset + cbimg.hole_length);
     683        4883 :                     rdt_datas_last->len =
     684        4883 :                         BLCKSZ - (bimg.hole_offset + cbimg.hole_length);
     685             :                 }
     686             :             }
     687             : 
     688        5112 :             total_len += bimg.length;
     689             :         }
     690             : 
     691     1484422 :         if (needs_data)
     692             :         {
     693             :             /*
     694             :              * Link the caller-supplied rdata chain for this buffer to the
     695             :              * overall list.
     696             :              */
     697     1162679 :             bkpb.fork_flags |= BKPBLOCK_HAS_DATA;
     698     1162679 :             bkpb.data_length = regbuf->rdata_len;
     699     1162679 :             total_len += regbuf->rdata_len;
     700             : 
     701     1162679 :             rdt_datas_last->next = regbuf->rdata_head;
     702     1162679 :             rdt_datas_last = regbuf->rdata_tail;
     703             :         }
     704             : 
     705     1484422 :         if (prev_regbuf && RelFileNodeEquals(regbuf->rnode, prev_regbuf->rnode))
     706             :         {
     707      117371 :             samerel = true;
     708      117371 :             bkpb.fork_flags |= BKPBLOCK_SAME_REL;
     709             :         }
     710             :         else
     711     1367051 :             samerel = false;
     712     1484422 :         prev_regbuf = regbuf;
     713             : 
     714             :         /* Ok, copy the header to the scratch buffer */
     715     1484422 :         memcpy(scratch, &bkpb, SizeOfXLogRecordBlockHeader);
     716     1484422 :         scratch += SizeOfXLogRecordBlockHeader;
     717     1484422 :         if (include_image)
     718             :         {
     719        5112 :             memcpy(scratch, &bimg, SizeOfXLogRecordBlockImageHeader);
     720        5112 :             scratch += SizeOfXLogRecordBlockImageHeader;
     721        5112 :             if (cbimg.hole_length != 0 && is_compressed)
     722             :             {
     723           0 :                 memcpy(scratch, &cbimg,
     724             :                        SizeOfXLogRecordBlockCompressHeader);
     725           0 :                 scratch += SizeOfXLogRecordBlockCompressHeader;
     726             :             }
     727             :         }
     728     1484422 :         if (!samerel)
     729             :         {
     730     1367051 :             memcpy(scratch, &regbuf->rnode, sizeof(RelFileNode));
     731     1367051 :             scratch += sizeof(RelFileNode);
     732             :         }
     733     1484422 :         memcpy(scratch, &regbuf->block, sizeof(BlockNumber));
     734     1484422 :         scratch += sizeof(BlockNumber);
     735             :     }
     736             : 
     737             :     /* followed by the record's origin, if any */
     738     2107911 :     if ((curinsert_flags & XLOG_INCLUDE_ORIGIN) &&
     739      717805 :         replorigin_session_origin != InvalidRepOriginId)
     740             :     {
     741           0 :         *(scratch++) = (char) XLR_BLOCK_ID_ORIGIN;
     742           0 :         memcpy(scratch, &replorigin_session_origin, sizeof(replorigin_session_origin));
     743           0 :         scratch += sizeof(replorigin_session_origin);
     744             :     }
     745             : 
     746             :     /* followed by main data, if any */
     747     1390106 :     if (mainrdata_len > 0)
     748             :     {
     749     1385953 :         if (mainrdata_len > 255)
     750             :         {
     751        3761 :             *(scratch++) = (char) XLR_BLOCK_ID_DATA_LONG;
     752        3761 :             memcpy(scratch, &mainrdata_len, sizeof(uint32));
     753        3761 :             scratch += sizeof(uint32);
     754             :         }
     755             :         else
     756             :         {
     757     1382192 :             *(scratch++) = (char) XLR_BLOCK_ID_DATA_SHORT;
     758     1382192 :             *(scratch++) = (uint8) mainrdata_len;
     759             :         }
     760     1385953 :         rdt_datas_last->next = mainrdata_head;
     761     1385953 :         rdt_datas_last = mainrdata_last;
     762     1385953 :         total_len += mainrdata_len;
     763             :     }
     764     1390106 :     rdt_datas_last->next = NULL;
     765             : 
     766     1390106 :     hdr_rdt.len = (scratch - hdr_scratch);
     767     1390106 :     total_len += hdr_rdt.len;
     768             : 
     769             :     /*
     770             :      * Calculate CRC of the data
     771             :      *
     772             :      * Note that the record header isn't added into the CRC initially since we
     773             :      * don't know the prev-link yet.  Thus, the CRC will represent the CRC of
     774             :      * the whole record in the order: rdata, then backup blocks, then record
     775             :      * header.
     776             :      */
     777     1390106 :     INIT_CRC32C(rdata_crc);
     778     1390106 :     COMP_CRC32C(rdata_crc, hdr_scratch + SizeOfXLogRecord, hdr_rdt.len - SizeOfXLogRecord);
     779     4929382 :     for (rdt = hdr_rdt.next; rdt != NULL; rdt = rdt->next)
     780     3539276 :         COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
     781             : 
     782             :     /*
     783             :      * Fill in the fields in the record header. Prev-link is filled in later,
     784             :      * once we know where in the WAL the record will be inserted. The CRC does
     785             :      * not include the record header yet.
     786             :      */
     787     1390106 :     rechdr->xl_xid = GetCurrentTransactionIdIfAny();
     788     1390106 :     rechdr->xl_tot_len = total_len;
     789     1390106 :     rechdr->xl_info = info;
     790     1390106 :     rechdr->xl_rmid = rmid;
     791     1390106 :     rechdr->xl_prev = InvalidXLogRecPtr;
     792     1390106 :     rechdr->xl_crc = rdata_crc;
     793             : 
     794     1390106 :     return &hdr_rdt;
     795             : }
     796             : 
     797             : /*
     798             :  * Create a compressed version of a backup block image.
     799             :  *
     800             :  * Returns FALSE if compression fails (i.e., compressed result is actually
     801             :  * bigger than original). Otherwise, returns TRUE and sets 'dlen' to
     802             :  * the length of compressed block image.
     803             :  */
     804             : static bool
     805           0 : XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length,
     806             :                         char *dest, uint16 *dlen)
     807             : {
     808           0 :     int32       orig_len = BLCKSZ - hole_length;
     809             :     int32       len;
     810           0 :     int32       extra_bytes = 0;
     811             :     char       *source;
     812             :     char        tmp[BLCKSZ];
     813             : 
     814           0 :     if (hole_length != 0)
     815             :     {
     816             :         /* must skip the hole */
     817           0 :         source = tmp;
     818           0 :         memcpy(source, page, hole_offset);
     819           0 :         memcpy(source + hole_offset,
     820           0 :                page + (hole_offset + hole_length),
     821           0 :                BLCKSZ - (hole_length + hole_offset));
     822             : 
     823             :         /*
     824             :          * Extra data needs to be stored in WAL record for the compressed
     825             :          * version of block image if the hole exists.
     826             :          */
     827           0 :         extra_bytes = SizeOfXLogRecordBlockCompressHeader;
     828             :     }
     829             :     else
     830           0 :         source = page;
     831             : 
     832             :     /*
     833             :      * We recheck the actual size even if pglz_compress() reports success and
     834             :      * see if the number of bytes saved by compression is larger than the
     835             :      * length of extra data needed for the compressed version of block image.
     836             :      */
     837           0 :     len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default);
     838           0 :     if (len >= 0 &&
     839           0 :         len + extra_bytes < orig_len)
     840             :     {
     841           0 :         *dlen = (uint16) len;   /* successful compression */
     842           0 :         return true;
     843             :     }
     844           0 :     return false;
     845             : }
     846             : 
     847             : /*
     848             :  * Determine whether the buffer referenced has to be backed up.
     849             :  *
     850             :  * Since we don't yet have the insert lock, fullPageWrites and forcePageWrites
     851             :  * could change later, so the result should be used for optimization purposes
     852             :  * only.
     853             :  */
     854             : bool
     855        6222 : XLogCheckBufferNeedsBackup(Buffer buffer)
     856             : {
     857             :     XLogRecPtr  RedoRecPtr;
     858             :     bool        doPageWrites;
     859             :     Page        page;
     860             : 
     861        6222 :     GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
     862             : 
     863        6222 :     page = BufferGetPage(buffer);
     864             : 
     865        6222 :     if (doPageWrites && PageGetLSN(page) <= RedoRecPtr)
     866           5 :         return true;            /* buffer requires backup */
     867             : 
     868        6217 :     return false;               /* buffer does not need to be backed up */
     869             : }
     870             : 
     871             : /*
     872             :  * Write a backup block if needed when we are setting a hint. Note that
     873             :  * this may be called for a variety of page types, not just heaps.
     874             :  *
     875             :  * Callable while holding just share lock on the buffer content.
     876             :  *
     877             :  * We can't use the plain backup block mechanism since that relies on the
     878             :  * Buffer being exclusively locked. Since some modifications (setting LSN, hint
     879             :  * bits) are allowed in a sharelocked buffer that can lead to wal checksum
     880             :  * failures. So instead we copy the page and insert the copied data as normal
     881             :  * record data.
     882             :  *
     883             :  * We only need to do something if page has not yet been full page written in
     884             :  * this checkpoint round. The LSN of the inserted wal record is returned if we
     885             :  * had to write, InvalidXLogRecPtr otherwise.
     886             :  *
     887             :  * It is possible that multiple concurrent backends could attempt to write WAL
     888             :  * records. In that case, multiple copies of the same block would be recorded
     889             :  * in separate WAL records by different backends, though that is still OK from
     890             :  * a correctness perspective.
     891             :  */
     892             : XLogRecPtr
     893           0 : XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
     894             : {
     895           0 :     XLogRecPtr  recptr = InvalidXLogRecPtr;
     896             :     XLogRecPtr  lsn;
     897             :     XLogRecPtr  RedoRecPtr;
     898             : 
     899             :     /*
     900             :      * Ensure no checkpoint can change our view of RedoRecPtr.
     901             :      */
     902           0 :     Assert(MyPgXact->delayChkpt);
     903             : 
     904             :     /*
     905             :      * Update RedoRecPtr so that we can make the right decision
     906             :      */
     907           0 :     RedoRecPtr = GetRedoRecPtr();
     908             : 
     909             :     /*
     910             :      * We assume page LSN is first data on *every* page that can be passed to
     911             :      * XLogInsert, whether it has the standard page layout or not. Since we're
     912             :      * only holding a share-lock on the page, we must take the buffer header
     913             :      * lock when we look at the LSN.
     914             :      */
     915           0 :     lsn = BufferGetLSNAtomic(buffer);
     916             : 
     917           0 :     if (lsn <= RedoRecPtr)
     918             :     {
     919             :         int         flags;
     920             :         char        copied_buffer[BLCKSZ];
     921           0 :         char       *origdata = (char *) BufferGetBlock(buffer);
     922             :         RelFileNode rnode;
     923             :         ForkNumber  forkno;
     924             :         BlockNumber blkno;
     925             : 
     926             :         /*
     927             :          * Copy buffer so we don't have to worry about concurrent hint bit or
     928             :          * lsn updates. We assume pd_lower/upper cannot be changed without an
     929             :          * exclusive lock, so the contents bkp are not racy.
     930             :          */
     931           0 :         if (buffer_std)
     932             :         {
     933             :             /* Assume we can omit data between pd_lower and pd_upper */
     934           0 :             Page        page = BufferGetPage(buffer);
     935           0 :             uint16      lower = ((PageHeader) page)->pd_lower;
     936           0 :             uint16      upper = ((PageHeader) page)->pd_upper;
     937             : 
     938           0 :             memcpy(copied_buffer, origdata, lower);
     939           0 :             memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper);
     940             :         }
     941             :         else
     942           0 :             memcpy(copied_buffer, origdata, BLCKSZ);
     943             : 
     944           0 :         XLogBeginInsert();
     945             : 
     946           0 :         flags = REGBUF_FORCE_IMAGE;
     947           0 :         if (buffer_std)
     948           0 :             flags |= REGBUF_STANDARD;
     949             : 
     950           0 :         BufferGetTag(buffer, &rnode, &forkno, &blkno);
     951           0 :         XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags);
     952             : 
     953           0 :         recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT);
     954             :     }
     955             : 
     956           0 :     return recptr;
     957             : }
     958             : 
     959             : /*
     960             :  * Write a WAL record containing a full image of a page. Caller is responsible
     961             :  * for writing the page to disk after calling this routine.
     962             :  *
     963             :  * Note: If you're using this function, you should be building pages in private
     964             :  * memory and writing them directly to smgr.  If you're using buffers, call
     965             :  * log_newpage_buffer instead.
     966             :  *
     967             :  * If the page follows the standard page layout, with a PageHeader and unused
     968             :  * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
     969             :  * the unused space to be left out from the WAL record, making it smaller.
     970             :  */
     971             : XLogRecPtr
     972        3827 : log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
     973             :             Page page, bool page_std)
     974             : {
     975             :     int         flags;
     976             :     XLogRecPtr  recptr;
     977             : 
     978        3827 :     flags = REGBUF_FORCE_IMAGE;
     979        3827 :     if (page_std)
     980        3804 :         flags |= REGBUF_STANDARD;
     981             : 
     982        3827 :     XLogBeginInsert();
     983        3827 :     XLogRegisterBlock(0, rnode, forkNum, blkno, page, flags);
     984        3827 :     recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);
     985             : 
     986             :     /*
     987             :      * The page may be uninitialized. If so, we can't set the LSN because that
     988             :      * would corrupt the page.
     989             :      */
     990        3827 :     if (!PageIsNew(page))
     991             :     {
     992        3827 :         PageSetLSN(page, recptr);
     993             :     }
     994             : 
     995        3827 :     return recptr;
     996             : }
     997             : 
     998             : /*
     999             :  * Write a WAL record containing a full image of a page.
    1000             :  *
    1001             :  * Caller should initialize the buffer and mark it dirty before calling this
    1002             :  * function.  This function will set the page LSN.
    1003             :  *
    1004             :  * If the page follows the standard page layout, with a PageHeader and unused
    1005             :  * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
    1006             :  * the unused space to be left out from the WAL record, making it smaller.
    1007             :  */
    1008             : XLogRecPtr
    1009           0 : log_newpage_buffer(Buffer buffer, bool page_std)
    1010             : {
    1011           0 :     Page        page = BufferGetPage(buffer);
    1012             :     RelFileNode rnode;
    1013             :     ForkNumber  forkNum;
    1014             :     BlockNumber blkno;
    1015             : 
    1016             :     /* Shared buffers should be modified in a critical section. */
    1017           0 :     Assert(CritSectionCount > 0);
    1018             : 
    1019           0 :     BufferGetTag(buffer, &rnode, &forkNum, &blkno);
    1020             : 
    1021           0 :     return log_newpage(&rnode, forkNum, blkno, page, page_std);
    1022             : }
    1023             : 
    1024             : /*
    1025             :  * Allocate working buffers needed for WAL record construction.
    1026             :  */
    1027             : void
    1028         359 : InitXLogInsert(void)
    1029             : {
    1030             :     /* Initialize the working areas */
    1031         359 :     if (xloginsert_cxt == NULL)
    1032             :     {
    1033         342 :         xloginsert_cxt = AllocSetContextCreate(TopMemoryContext,
    1034             :                                                "WAL record construction",
    1035             :                                                ALLOCSET_DEFAULT_SIZES);
    1036             :     }
    1037             : 
    1038         359 :     if (registered_buffers == NULL)
    1039             :     {
    1040         342 :         registered_buffers = (registered_buffer *)
    1041         342 :             MemoryContextAllocZero(xloginsert_cxt,
    1042             :                                    sizeof(registered_buffer) * (XLR_NORMAL_MAX_BLOCK_ID + 1));
    1043         342 :         max_registered_buffers = XLR_NORMAL_MAX_BLOCK_ID + 1;
    1044             :     }
    1045         359 :     if (rdatas == NULL)
    1046             :     {
    1047         342 :         rdatas = MemoryContextAlloc(xloginsert_cxt,
    1048             :                                     sizeof(XLogRecData) * XLR_NORMAL_RDATAS);
    1049         342 :         max_rdatas = XLR_NORMAL_RDATAS;
    1050             :     }
    1051             : 
    1052             :     /*
    1053             :      * Allocate a buffer to hold the header information for a WAL record.
    1054             :      */
    1055         359 :     if (hdr_scratch == NULL)
    1056         342 :         hdr_scratch = MemoryContextAllocZero(xloginsert_cxt,
    1057             :                                              HEADER_SCRATCH_SIZE);
    1058         359 : }

Generated by: LCOV version 1.11