Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufpage.h
4 : * Standard POSTGRES buffer page definitions.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/include/storage/bufpage.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef BUFPAGE_H
15 : #define BUFPAGE_H
16 :
17 : #include "access/xlogdefs.h"
18 : #include "storage/block.h"
19 : #include "storage/item.h"
20 : #include "storage/off.h"
21 :
22 : /*
23 : * A postgres disk page is an abstraction layered on top of a postgres
24 : * disk block (which is simply a unit of i/o, see block.h).
25 : *
26 : * specifically, while a disk block can be unformatted, a postgres
27 : * disk page is always a slotted page of the form:
28 : *
29 : * +----------------+---------------------------------+
30 : * | PageHeaderData | linp1 linp2 linp3 ... |
31 : * +-----------+----+---------------------------------+
32 : * | ... linpN | |
33 : * +-----------+--------------------------------------+
34 : * | ^ pd_lower |
35 : * | |
36 : * | v pd_upper |
37 : * +-------------+------------------------------------+
38 : * | | tupleN ... |
39 : * +-------------+------------------+-----------------+
40 : * | ... tuple3 tuple2 tuple1 | "special space" |
41 : * +--------------------------------+-----------------+
42 : * ^ pd_special
43 : *
44 : * a page is full when nothing can be added between pd_lower and
45 : * pd_upper.
46 : *
47 : * all blocks written out by an access method must be disk pages.
48 : *
49 : * EXCEPTIONS:
50 : *
51 : * obviously, a page is not formatted before it is initialized by
52 : * a call to PageInit.
53 : *
54 : * NOTES:
55 : *
56 : * linp1..N form an ItemId array. ItemPointers point into this array
57 : * rather than pointing directly to a tuple. Note that OffsetNumbers
58 : * conventionally start at 1, not 0.
59 : *
60 : * tuple1..N are added "backwards" on the page. because a tuple's
61 : * ItemPointer points to its ItemId entry rather than its actual
62 : * byte-offset position, tuples can be physically shuffled on a page
63 : * whenever the need arises.
64 : *
65 : * AM-generic per-page information is kept in PageHeaderData.
66 : *
67 : * AM-specific per-page data (if any) is kept in the area marked "special
68 : * space"; each AM has an "opaque" structure defined somewhere that is
69 : * stored as the page trailer. an access method should always
70 : * initialize its pages with PageInit and then set its own opaque
71 : * fields.
72 : */
73 :
74 : typedef Pointer Page;
75 :
76 :
77 : /*
78 : * location (byte offset) within a page.
79 : *
80 : * note that this is actually limited to 2^15 because we have limited
81 : * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
82 : */
83 : typedef uint16 LocationIndex;
84 :
85 :
86 : /*
87 : * For historical reasons, the 64-bit LSN value is stored as two 32-bit
88 : * values.
89 : */
90 : typedef struct
91 : {
92 : uint32 xlogid; /* high bits */
93 : uint32 xrecoff; /* low bits */
94 : } PageXLogRecPtr;
95 :
96 : #define PageXLogRecPtrGet(val) \
97 : ((uint64) (val).xlogid << 32 | (val).xrecoff)
98 : #define PageXLogRecPtrSet(ptr, lsn) \
99 : ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
100 :
101 : /*
102 : * disk page organization
103 : *
104 : * space management information generic to any page
105 : *
106 : * pd_lsn - identifies xlog record for last change to this page.
107 : * pd_checksum - page checksum, if set.
108 : * pd_flags - flag bits.
109 : * pd_lower - offset to start of free space.
110 : * pd_upper - offset to end of free space.
111 : * pd_special - offset to start of special space.
112 : * pd_pagesize_version - size in bytes and page layout version number.
113 : * pd_prune_xid - oldest XID among potentially prunable tuples on page.
114 : *
115 : * The LSN is used by the buffer manager to enforce the basic rule of WAL:
116 : * "thou shalt write xlog before data". A dirty buffer cannot be dumped
117 : * to disk until xlog has been flushed at least as far as the page's LSN.
118 : *
119 : * pd_checksum stores the page checksum, if it has been set for this page;
120 : * zero is a valid value for a checksum. If a checksum is not in use then
121 : * we leave the field unset. This will typically mean the field is zero
122 : * though non-zero values may also be present if databases have been
123 : * pg_upgraded from releases prior to 9.3, when the same byte offset was
124 : * used to store the current timelineid when the page was last updated.
125 : * Note that there is no indication on a page as to whether the checksum
126 : * is valid or not, a deliberate design choice which avoids the problem
127 : * of relying on the page contents to decide whether to verify it. Hence
128 : * there are no flag bits relating to checksums.
129 : *
130 : * pd_prune_xid is a hint field that helps determine whether pruning will be
131 : * useful. It is currently unused in index pages.
132 : *
133 : * The page version number and page size are packed together into a single
134 : * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
135 : * there was no concept of a page version number, and doing it this way
136 : * lets us pretend that pre-7.3 databases have page version number zero.
137 : * We constrain page sizes to be multiples of 256, leaving the low eight
138 : * bits available for a version number.
139 : *
140 : * Minimum possible page size is perhaps 64B to fit page header, opaque space
141 : * and a minimal tuple; of course, in reality you want it much bigger, so
142 : * the constraint on pagesize mod 256 is not an important restriction.
143 : * On the high end, we can only support pages up to 32KB because lp_off/lp_len
144 : * are 15 bits.
145 : */
146 :
147 : typedef struct PageHeaderData
148 : {
149 : /* XXX LSN is member of *any* block, not only page-organized ones */
150 : PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
151 : * record for last change to this page */
152 : uint16 pd_checksum; /* checksum */
153 : uint16 pd_flags; /* flag bits, see below */
154 : LocationIndex pd_lower; /* offset to start of free space */
155 : LocationIndex pd_upper; /* offset to end of free space */
156 : LocationIndex pd_special; /* offset to start of special space */
157 : uint16 pd_pagesize_version;
158 : TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
159 : ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
160 : } PageHeaderData;
161 :
162 : typedef PageHeaderData *PageHeader;
163 :
164 : /*
165 : * pd_flags contains the following flag bits. Undefined bits are initialized
166 : * to zero and may be used in the future.
167 : *
168 : * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
169 : * pd_lower. This should be considered a hint rather than the truth, since
170 : * changes to it are not WAL-logged.
171 : *
172 : * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
173 : * page for its new tuple version; this suggests that a prune is needed.
174 : * Again, this is just a hint.
175 : */
176 : #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
177 : #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
178 : #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
179 : * everyone */
180 :
181 : #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
182 :
183 : /*
184 : * Page layout version number 0 is for pre-7.3 Postgres releases.
185 : * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
186 : * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
187 : * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
188 : * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
189 : * added the pd_flags field (by stealing some bits from pd_tli),
190 : * as well as adding the pd_prune_xid field (which enlarges the header).
191 : *
192 : * As of Release 9.3, the checksum version must also be considered when
193 : * handling pages.
194 : */
195 : #define PG_PAGE_LAYOUT_VERSION 4
196 : #define PG_DATA_CHECKSUM_VERSION 1
197 :
198 : /* ----------------------------------------------------------------
199 : * page support macros
200 : * ----------------------------------------------------------------
201 : */
202 :
203 : /*
204 : * PageIsValid
205 : * True iff page is valid.
206 : */
207 : #define PageIsValid(page) PointerIsValid(page)
208 :
209 : /*
210 : * line pointer(s) do not count as part of header
211 : */
212 : #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
213 :
214 : /*
215 : * PageIsEmpty
216 : * returns true iff no itemid has been allocated on the page
217 : */
218 : #define PageIsEmpty(page) \
219 : (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData)
220 :
221 : /*
222 : * PageIsNew
223 : * returns true iff page has not been initialized (by PageInit)
224 : */
225 : #define PageIsNew(page) (((PageHeader) (page))->pd_upper == 0)
226 :
227 : /*
228 : * PageGetItemId
229 : * Returns an item identifier of a page.
230 : */
231 : #define PageGetItemId(page, offsetNumber) \
232 : ((ItemId) (&((PageHeader) (page))->pd_linp[(offsetNumber) - 1]))
233 :
234 : /*
235 : * PageGetContents
236 : * To be used in case the page does not contain item pointers.
237 : *
238 : * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
239 : * Now it is. Beware of old code that might think the offset to the contents
240 : * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
241 : */
242 : #define PageGetContents(page) \
243 : ((char *) (page) + MAXALIGN(SizeOfPageHeaderData))
244 :
245 : /* ----------------
246 : * macros to access page size info
247 : * ----------------
248 : */
249 :
250 : /*
251 : * PageSizeIsValid
252 : * True iff the page size is valid.
253 : */
254 : #define PageSizeIsValid(pageSize) ((pageSize) == BLCKSZ)
255 :
256 : /*
257 : * PageGetPageSize
258 : * Returns the page size of a page.
259 : *
260 : * this can only be called on a formatted page (unlike
261 : * BufferGetPageSize, which can be called on an unformatted page).
262 : * however, it can be called on a page that is not stored in a buffer.
263 : */
264 : #define PageGetPageSize(page) \
265 : ((Size) (((PageHeader) (page))->pd_pagesize_version & (uint16) 0xFF00))
266 :
267 : /*
268 : * PageGetPageLayoutVersion
269 : * Returns the page layout version of a page.
270 : */
271 : #define PageGetPageLayoutVersion(page) \
272 : (((PageHeader) (page))->pd_pagesize_version & 0x00FF)
273 :
274 : /*
275 : * PageSetPageSizeAndVersion
276 : * Sets the page size and page layout version number of a page.
277 : *
278 : * We could support setting these two values separately, but there's
279 : * no real need for it at the moment.
280 : */
281 : #define PageSetPageSizeAndVersion(page, size, version) \
282 : ( \
283 : AssertMacro(((size) & 0xFF00) == (size)), \
284 : AssertMacro(((version) & 0x00FF) == (version)), \
285 : ((PageHeader) (page))->pd_pagesize_version = (size) | (version) \
286 : )
287 :
288 : /* ----------------
289 : * page special data macros
290 : * ----------------
291 : */
292 : /*
293 : * PageGetSpecialSize
294 : * Returns size of special space on a page.
295 : */
296 : #define PageGetSpecialSize(page) \
297 : ((uint16) (PageGetPageSize(page) - ((PageHeader)(page))->pd_special))
298 :
299 : /*
300 : * Using assertions, validate that the page special pointer is OK.
301 : *
302 : * This is intended to catch use of the pointer before page initialization.
303 : * It is implemented as a function due to the limitations of the MSVC
304 : * compiler, which choked on doing all these tests within another macro. We
305 : * return true so that MacroAssert() can be used while still getting the
306 : * specifics from the macro failure within this function.
307 : */
308 : static inline bool
309 19750502 : PageValidateSpecialPointer(Page page)
310 : {
311 19750502 : Assert(PageIsValid(page));
312 19750502 : Assert(((PageHeader) (page))->pd_special <= BLCKSZ);
313 19750502 : Assert(((PageHeader) (page))->pd_special >= SizeOfPageHeaderData);
314 :
315 19750502 : return true;
316 : }
317 :
318 : /*
319 : * PageGetSpecialPointer
320 : * Returns pointer to special space on a page.
321 : */
322 : #define PageGetSpecialPointer(page) \
323 : ( \
324 : AssertMacro(PageValidateSpecialPointer(page)), \
325 : (char *) ((char *) (page) + ((PageHeader) (page))->pd_special) \
326 : )
327 :
328 : /*
329 : * PageGetItem
330 : * Retrieves an item on the given page.
331 : *
332 : * Note:
333 : * This does not change the status of any of the resources passed.
334 : * The semantics may change in the future.
335 : */
336 : #define PageGetItem(page, itemId) \
337 : ( \
338 : AssertMacro(PageIsValid(page)), \
339 : AssertMacro(ItemIdHasStorage(itemId)), \
340 : (Item)(((char *)(page)) + ItemIdGetOffset(itemId)) \
341 : )
342 :
343 : /*
344 : * PageGetMaxOffsetNumber
345 : * Returns the maximum offset number used by the given page.
346 : * Since offset numbers are 1-based, this is also the number
347 : * of items on the page.
348 : *
349 : * NOTE: if the page is not initialized (pd_lower == 0), we must
350 : * return zero to ensure sane behavior. Accept double evaluation
351 : * of the argument so that we can ensure this.
352 : */
353 : #define PageGetMaxOffsetNumber(page) \
354 : (((PageHeader) (page))->pd_lower <= SizeOfPageHeaderData ? 0 : \
355 : ((((PageHeader) (page))->pd_lower - SizeOfPageHeaderData) \
356 : / sizeof(ItemIdData)))
357 :
358 : /*
359 : * Additional macros for access to page headers. (Beware multiple evaluation
360 : * of the arguments!)
361 : */
362 : #define PageGetLSN(page) \
363 : PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn)
364 : #define PageSetLSN(page, lsn) \
365 : PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn)
366 :
367 : #define PageHasFreeLinePointers(page) \
368 : (((PageHeader) (page))->pd_flags & PD_HAS_FREE_LINES)
369 : #define PageSetHasFreeLinePointers(page) \
370 : (((PageHeader) (page))->pd_flags |= PD_HAS_FREE_LINES)
371 : #define PageClearHasFreeLinePointers(page) \
372 : (((PageHeader) (page))->pd_flags &= ~PD_HAS_FREE_LINES)
373 :
374 : #define PageIsFull(page) \
375 : (((PageHeader) (page))->pd_flags & PD_PAGE_FULL)
376 : #define PageSetFull(page) \
377 : (((PageHeader) (page))->pd_flags |= PD_PAGE_FULL)
378 : #define PageClearFull(page) \
379 : (((PageHeader) (page))->pd_flags &= ~PD_PAGE_FULL)
380 :
381 : #define PageIsAllVisible(page) \
382 : (((PageHeader) (page))->pd_flags & PD_ALL_VISIBLE)
383 : #define PageSetAllVisible(page) \
384 : (((PageHeader) (page))->pd_flags |= PD_ALL_VISIBLE)
385 : #define PageClearAllVisible(page) \
386 : (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
387 :
388 : #define PageIsPrunable(page, oldestxmin) \
389 : ( \
390 : AssertMacro(TransactionIdIsNormal(oldestxmin)), \
391 : TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) && \
392 : TransactionIdPrecedes(((PageHeader) (page))->pd_prune_xid, oldestxmin) \
393 : )
394 : #define PageSetPrunable(page, xid) \
395 : do { \
396 : Assert(TransactionIdIsNormal(xid)); \
397 : if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
398 : TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
399 : ((PageHeader) (page))->pd_prune_xid = (xid); \
400 : } while (0)
401 : #define PageClearPrunable(page) \
402 : (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
403 :
404 :
405 : /* ----------------------------------------------------------------
406 : * extern declarations
407 : * ----------------------------------------------------------------
408 : */
409 : #define PAI_OVERWRITE (1 << 0)
410 : #define PAI_IS_HEAP (1 << 1)
411 :
412 : #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
413 : PageAddItemExtended(page, item, size, offsetNumber, \
414 : ((overwrite) ? PAI_OVERWRITE : 0) | \
415 : ((is_heap) ? PAI_IS_HEAP : 0))
416 :
417 : extern void PageInit(Page page, Size pageSize, Size specialSize);
418 : extern bool PageIsVerified(Page page, BlockNumber blkno);
419 : extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
420 : OffsetNumber offsetNumber, int flags);
421 : extern Page PageGetTempPage(Page page);
422 : extern Page PageGetTempPageCopy(Page page);
423 : extern Page PageGetTempPageCopySpecial(Page page);
424 : extern void PageRestoreTempPage(Page tempPage, Page oldPage);
425 : extern void PageRepairFragmentation(Page page);
426 : extern Size PageGetFreeSpace(Page page);
427 : extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
428 : extern Size PageGetExactFreeSpace(Page page);
429 : extern Size PageGetHeapFreeSpace(Page page);
430 : extern void PageIndexTupleDelete(Page page, OffsetNumber offset);
431 : extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
432 : extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offset);
433 : extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
434 : Item newtup, Size newsize);
435 : extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
436 : extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
437 :
438 : #endif /* BUFPAGE_H */
|