Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pruneheap.c
4 : * heap page pruning and HOT-chain management code
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/heap/pruneheap.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/heapam.h"
18 : #include "access/heapam_xlog.h"
19 : #include "access/transam.h"
20 : #include "access/htup_details.h"
21 : #include "access/xlog.h"
22 : #include "catalog/catalog.h"
23 : #include "miscadmin.h"
24 : #include "pgstat.h"
25 : #include "storage/bufmgr.h"
26 : #include "utils/snapmgr.h"
27 : #include "utils/rel.h"
28 : #include "utils/tqual.h"
29 :
30 : /* Working data for heap_page_prune and subroutines */
31 : typedef struct
32 : {
33 : TransactionId new_prune_xid; /* new prune hint value for page */
34 : TransactionId latestRemovedXid; /* latest xid to be removed by this prune */
35 : int nredirected; /* numbers of entries in arrays below */
36 : int ndead;
37 : int nunused;
38 : /* arrays that accumulate indexes of items to be changed */
39 : OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
40 : OffsetNumber nowdead[MaxHeapTuplesPerPage];
41 : OffsetNumber nowunused[MaxHeapTuplesPerPage];
42 : /* marked[i] is TRUE if item i is entered in one of the above arrays */
43 : bool marked[MaxHeapTuplesPerPage + 1];
44 : } PruneState;
45 :
46 : /* Local functions */
47 : static int heap_prune_chain(Relation relation, Buffer buffer,
48 : OffsetNumber rootoffnum,
49 : TransactionId OldestXmin,
50 : PruneState *prstate);
51 : static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
52 : static void heap_prune_record_redirect(PruneState *prstate,
53 : OffsetNumber offnum, OffsetNumber rdoffnum);
54 : static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
55 : static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
56 :
57 :
58 : /*
59 : * Optionally prune and repair fragmentation in the specified page.
60 : *
61 : * This is an opportunistic function. It will perform housekeeping
62 : * only if the page heuristically looks like a candidate for pruning and we
63 : * can acquire buffer cleanup lock without blocking.
64 : *
65 : * Note: this is called quite often. It's important that it fall out quickly
66 : * if there's not any use in pruning.
67 : *
68 : * Caller must have pin on the buffer, and must *not* have a lock on it.
69 : *
70 : * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
71 : * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
72 : */
73 : void
74 499007 : heap_page_prune_opt(Relation relation, Buffer buffer)
75 : {
76 499007 : Page page = BufferGetPage(buffer);
77 : Size minfree;
78 : TransactionId OldestXmin;
79 :
80 : /*
81 : * We can't write WAL in recovery mode, so there's no point trying to
82 : * clean the page. The master will likely issue a cleaning WAL record soon
83 : * anyway, so this is no particular loss.
84 : */
85 499007 : if (RecoveryInProgress())
86 0 : return;
87 :
88 : /*
89 : * Use the appropriate xmin horizon for this relation. If it's a proper
90 : * catalog relation or a user defined, additional, catalog relation, we
91 : * need to use the horizon that includes slots, otherwise the data-only
92 : * horizon can be used. Note that the toast relation of user defined
93 : * relations are *not* considered catalog relations.
94 : *
95 : * It is OK to apply the old snapshot limit before acquiring the cleanup
96 : * lock because the worst that can happen is that we are not quite as
97 : * aggressive about the cleanup (by however many transaction IDs are
98 : * consumed between this point and acquiring the lock). This allows us to
99 : * save significant overhead in the case where the page is found not to be
100 : * prunable.
101 : */
102 665660 : if (IsCatalogRelation(relation) ||
103 166653 : RelationIsAccessibleInLogicalDecoding(relation))
104 332354 : OldestXmin = RecentGlobalXmin;
105 : else
106 166653 : OldestXmin =
107 166653 : TransactionIdLimitedForOldSnapshots(RecentGlobalDataXmin,
108 : relation);
109 :
110 499007 : Assert(TransactionIdIsValid(OldestXmin));
111 :
112 : /*
113 : * Let's see if we really need pruning.
114 : *
115 : * Forget it if page is not hinted to contain something prunable that's
116 : * older than OldestXmin.
117 : */
118 499007 : if (!PageIsPrunable(page, OldestXmin))
119 427845 : return;
120 :
121 : /*
122 : * We prune when a previous UPDATE failed to find enough space on the page
123 : * for a new tuple version, or when free space falls below the relation's
124 : * fill-factor target (but not less than 10%).
125 : *
126 : * Checking free space here is questionable since we aren't holding any
127 : * lock on the buffer; in the worst case we could get a bogus answer. It's
128 : * unlikely to be *seriously* wrong, though, since reading either pd_lower
129 : * or pd_upper is probably atomic. Avoiding taking a lock seems more
130 : * important than sometimes getting a wrong answer in what is after all
131 : * just a heuristic estimate.
132 : */
133 71162 : minfree = RelationGetTargetPageFreeSpace(relation,
134 : HEAP_DEFAULT_FILLFACTOR);
135 71162 : minfree = Max(minfree, BLCKSZ / 10);
136 :
137 71162 : if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
138 : {
139 : /* OK, try to get exclusive buffer lock */
140 2327 : if (!ConditionalLockBufferForCleanup(buffer))
141 19 : return;
142 :
143 : /*
144 : * Now that we have buffer lock, get accurate information about the
145 : * page's free space, and recheck the heuristic about whether to
146 : * prune. (We needn't recheck PageIsPrunable, since no one else could
147 : * have pruned while we hold pin.)
148 : */
149 2308 : if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
150 : {
151 2308 : TransactionId ignore = InvalidTransactionId; /* return value not
152 : * needed */
153 :
154 : /* OK to prune */
155 2308 : (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
156 : }
157 :
158 : /* And release buffer lock */
159 2308 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
160 : }
161 : }
162 :
163 :
164 : /*
165 : * Prune and repair fragmentation in the specified page.
166 : *
167 : * Caller must have pin and buffer cleanup lock on the page.
168 : *
169 : * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD
170 : * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum).
171 : *
172 : * If report_stats is true then we send the number of reclaimed heap-only
173 : * tuples to pgstats. (This must be FALSE during vacuum, since vacuum will
174 : * send its own new total to pgstats, and we don't want this delta applied
175 : * on top of that.)
176 : *
177 : * Returns the number of tuples deleted from the page and sets
178 : * latestRemovedXid.
179 : */
180 : int
181 7047 : heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
182 : bool report_stats, TransactionId *latestRemovedXid)
183 : {
184 7047 : int ndeleted = 0;
185 7047 : Page page = BufferGetPage(buffer);
186 : OffsetNumber offnum,
187 : maxoff;
188 : PruneState prstate;
189 :
190 : /*
191 : * Our strategy is to scan the page and make lists of items to change,
192 : * then apply the changes within a critical section. This keeps as much
193 : * logic as possible out of the critical section, and also ensures that
194 : * WAL replay will work the same as the normal case.
195 : *
196 : * First, initialize the new pd_prune_xid value to zero (indicating no
197 : * prunable tuples). If we find any tuples which may soon become
198 : * prunable, we will save the lowest relevant XID in new_prune_xid. Also
199 : * initialize the rest of our working state.
200 : */
201 7047 : prstate.new_prune_xid = InvalidTransactionId;
202 7047 : prstate.latestRemovedXid = *latestRemovedXid;
203 7047 : prstate.nredirected = prstate.ndead = prstate.nunused = 0;
204 7047 : memset(prstate.marked, 0, sizeof(prstate.marked));
205 :
206 : /* Scan the page */
207 7047 : maxoff = PageGetMaxOffsetNumber(page);
208 690197 : for (offnum = FirstOffsetNumber;
209 : offnum <= maxoff;
210 676103 : offnum = OffsetNumberNext(offnum))
211 : {
212 : ItemId itemid;
213 :
214 : /* Ignore items already processed as part of an earlier chain */
215 676103 : if (prstate.marked[offnum])
216 4209 : continue;
217 :
218 : /* Nothing to do if slot is empty or already dead */
219 671894 : itemid = PageGetItemId(page, offnum);
220 671894 : if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
221 110198 : continue;
222 :
223 : /* Process this item or chain of items */
224 561696 : ndeleted += heap_prune_chain(relation, buffer, offnum,
225 : OldestXmin,
226 : &prstate);
227 : }
228 :
229 : /* Any error while applying the changes is critical */
230 7047 : START_CRIT_SECTION();
231 :
232 : /* Have we found any prunable items? */
233 7047 : if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
234 : {
235 : /*
236 : * Apply the planned item changes, then repair page fragmentation, and
237 : * update the page's hint bit about whether it has free line pointers.
238 : */
239 3029 : heap_page_prune_execute(buffer,
240 : prstate.redirected, prstate.nredirected,
241 : prstate.nowdead, prstate.ndead,
242 : prstate.nowunused, prstate.nunused);
243 :
244 : /*
245 : * Update the page's pd_prune_xid field to either zero, or the lowest
246 : * XID of any soon-prunable tuple.
247 : */
248 3029 : ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
249 :
250 : /*
251 : * Also clear the "page is full" flag, since there's no point in
252 : * repeating the prune/defrag process until something else happens to
253 : * the page.
254 : */
255 3029 : PageClearFull(page);
256 :
257 3029 : MarkBufferDirty(buffer);
258 :
259 : /*
260 : * Emit a WAL HEAP_CLEAN record showing what we did
261 : */
262 6058 : if (RelationNeedsWAL(relation))
263 : {
264 : XLogRecPtr recptr;
265 :
266 3028 : recptr = log_heap_clean(relation, buffer,
267 : prstate.redirected, prstate.nredirected,
268 : prstate.nowdead, prstate.ndead,
269 : prstate.nowunused, prstate.nunused,
270 : prstate.latestRemovedXid);
271 :
272 3028 : PageSetLSN(BufferGetPage(buffer), recptr);
273 : }
274 : }
275 : else
276 : {
277 : /*
278 : * If we didn't prune anything, but have found a new value for the
279 : * pd_prune_xid field, update it and mark the buffer dirty. This is
280 : * treated as a non-WAL-logged hint.
281 : *
282 : * Also clear the "page is full" flag if it is set, since there's no
283 : * point in repeating the prune/defrag process until something else
284 : * happens to the page.
285 : */
286 8019 : if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
287 4001 : PageIsFull(page))
288 : {
289 18 : ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
290 18 : PageClearFull(page);
291 18 : MarkBufferDirtyHint(buffer, true);
292 : }
293 : }
294 :
295 7047 : END_CRIT_SECTION();
296 :
297 : /*
298 : * If requested, report the number of tuples reclaimed to pgstats. This is
299 : * ndeleted minus ndead, because we don't want to count a now-DEAD root
300 : * item as a deletion for this purpose.
301 : */
302 7047 : if (report_stats && ndeleted > prstate.ndead)
303 706 : pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
304 :
305 7047 : *latestRemovedXid = prstate.latestRemovedXid;
306 :
307 : /*
308 : * XXX Should we update the FSM information of this page ?
309 : *
310 : * There are two schools of thought here. We may not want to update FSM
311 : * information so that the page is not used for unrelated UPDATEs/INSERTs
312 : * and any free space in this page will remain available for further
313 : * UPDATEs in *this* page, thus improving chances for doing HOT updates.
314 : *
315 : * But for a large table and where a page does not receive further UPDATEs
316 : * for a long time, we might waste this space by not updating the FSM
317 : * information. The relation may get extended and fragmented further.
318 : *
319 : * One possibility is to leave "fillfactor" worth of space in this page
320 : * and update FSM with the remaining space.
321 : */
322 :
323 7047 : return ndeleted;
324 : }
325 :
326 :
327 : /*
328 : * Prune specified item pointer or a HOT chain originating at that item.
329 : *
330 : * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
331 : * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
332 : * chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
333 : * This is OK because a RECENTLY_DEAD tuple preceding a DEAD tuple is really
334 : * DEAD, the OldestXmin test is just too coarse to detect it.
335 : *
336 : * The root line pointer is redirected to the tuple immediately after the
337 : * latest DEAD tuple. If all tuples in the chain are DEAD, the root line
338 : * pointer is marked LP_DEAD. (This includes the case of a DEAD simple
339 : * tuple, which we treat as a chain of length 1.)
340 : *
341 : * OldestXmin is the cutoff XID used to identify dead tuples.
342 : *
343 : * We don't actually change the page here, except perhaps for hint-bit updates
344 : * caused by HeapTupleSatisfiesVacuum. We just add entries to the arrays in
345 : * prstate showing the changes to be made. Items to be redirected are added
346 : * to the redirected[] array (two entries per redirection); items to be set to
347 : * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
348 : * state are added to nowunused[].
349 : *
350 : * Returns the number of tuples (to be) deleted from the page.
351 : */
352 : static int
353 561696 : heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
354 : TransactionId OldestXmin,
355 : PruneState *prstate)
356 : {
357 561696 : int ndeleted = 0;
358 561696 : Page dp = (Page) BufferGetPage(buffer);
359 561696 : TransactionId priorXmax = InvalidTransactionId;
360 : ItemId rootlp;
361 : HeapTupleHeader htup;
362 561696 : OffsetNumber latestdead = InvalidOffsetNumber,
363 561696 : maxoff = PageGetMaxOffsetNumber(dp),
364 : offnum;
365 : OffsetNumber chainitems[MaxHeapTuplesPerPage];
366 561696 : int nchain = 0,
367 : i;
368 : HeapTupleData tup;
369 :
370 561696 : tup.t_tableOid = RelationGetRelid(relation);
371 :
372 561696 : rootlp = PageGetItemId(dp, rootoffnum);
373 :
374 : /*
375 : * If it's a heap-only tuple, then it is not the start of a HOT chain.
376 : */
377 561696 : if (ItemIdIsNormal(rootlp))
378 : {
379 558489 : htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
380 :
381 558489 : tup.t_data = htup;
382 558489 : tup.t_len = ItemIdGetLength(rootlp);
383 558489 : ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum);
384 :
385 558489 : if (HeapTupleHeaderIsHeapOnly(htup))
386 : {
387 : /*
388 : * If the tuple is DEAD and doesn't chain to anything else, mark
389 : * it unused immediately. (If it does chain, we can only remove
390 : * it as part of pruning its chain.)
391 : *
392 : * We need this primarily to handle aborted HOT updates, that is,
393 : * XMIN_INVALID heap-only tuples. Those might not be linked to by
394 : * any chain, since the parent tuple might be re-updated before
395 : * any pruning occurs. So we have to be able to reap them
396 : * separately from chain-pruning. (Note that
397 : * HeapTupleHeaderIsHotUpdated will never return true for an
398 : * XMIN_INVALID tuple, so this code will work even when there were
399 : * sequential updates within the aborted transaction.)
400 : *
401 : * Note that we might first arrive at a dead heap-only tuple
402 : * either here or while following a chain below. Whichever path
403 : * gets there first will mark the tuple unused.
404 : */
405 3966 : if (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer)
406 430 : == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup))
407 : {
408 285 : heap_prune_record_unused(prstate, rootoffnum);
409 285 : HeapTupleHeaderAdvanceLatestRemovedXid(htup,
410 : &prstate->latestRemovedXid);
411 285 : ndeleted++;
412 : }
413 :
414 : /* Nothing more to do */
415 3966 : return ndeleted;
416 : }
417 : }
418 :
419 : /* Start from the root tuple */
420 557730 : offnum = rootoffnum;
421 :
422 : /* while not end of the chain */
423 : for (;;)
424 : {
425 : ItemId lp;
426 : bool tupdead,
427 : recent_dead;
428 :
429 : /* Some sanity checks */
430 565468 : if (offnum < FirstOffsetNumber || offnum > maxoff)
431 : break;
432 :
433 : /* If item is already processed, stop --- it must not be same chain */
434 565468 : if (prstate->marked[offnum])
435 83 : break;
436 :
437 565385 : lp = PageGetItemId(dp, offnum);
438 :
439 : /* Unused item obviously isn't part of the chain */
440 565385 : if (!ItemIdIsUsed(lp))
441 0 : break;
442 :
443 : /*
444 : * If we are looking at the redirected root line pointer, jump to the
445 : * first normal tuple in the chain. If we find a redirect somewhere
446 : * else, stop --- it must not be same chain.
447 : */
448 565385 : if (ItemIdIsRedirected(lp))
449 : {
450 3207 : if (nchain > 0)
451 0 : break; /* not at start of chain */
452 3207 : chainitems[nchain++] = offnum;
453 3207 : offnum = ItemIdGetRedirect(rootlp);
454 3207 : continue;
455 : }
456 :
457 : /*
458 : * Likewise, a dead item pointer can't be part of the chain. (We
459 : * already eliminated the case of dead root tuple outside this
460 : * function.)
461 : */
462 562178 : if (ItemIdIsDead(lp))
463 0 : break;
464 :
465 562178 : Assert(ItemIdIsNormal(lp));
466 562178 : htup = (HeapTupleHeader) PageGetItem(dp, lp);
467 :
468 562178 : tup.t_data = htup;
469 562178 : tup.t_len = ItemIdGetLength(lp);
470 562178 : ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum);
471 :
472 : /*
473 : * Check the tuple XMIN against prior XMAX, if any
474 : */
475 566648 : if (TransactionIdIsValid(priorXmax) &&
476 4470 : !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
477 0 : break;
478 :
479 : /*
480 : * OK, this tuple is indeed a member of the chain.
481 : */
482 562178 : chainitems[nchain++] = offnum;
483 :
484 : /*
485 : * Check tuple's visibility status.
486 : */
487 562178 : tupdead = recent_dead = false;
488 :
489 562178 : switch (HeapTupleSatisfiesVacuum(&tup, OldestXmin, buffer))
490 : {
491 : case HEAPTUPLE_DEAD:
492 107106 : tupdead = true;
493 107106 : break;
494 :
495 : case HEAPTUPLE_RECENTLY_DEAD:
496 40672 : recent_dead = true;
497 :
498 : /*
499 : * This tuple may soon become DEAD. Update the hint field so
500 : * that the page is reconsidered for pruning in future.
501 : */
502 40672 : heap_prune_record_prunable(prstate,
503 81344 : HeapTupleHeaderGetUpdateXid(htup));
504 40672 : break;
505 :
506 : case HEAPTUPLE_DELETE_IN_PROGRESS:
507 :
508 : /*
509 : * This tuple may soon become DEAD. Update the hint field so
510 : * that the page is reconsidered for pruning in future.
511 : */
512 441 : heap_prune_record_prunable(prstate,
513 882 : HeapTupleHeaderGetUpdateXid(htup));
514 441 : break;
515 :
516 : case HEAPTUPLE_LIVE:
517 : case HEAPTUPLE_INSERT_IN_PROGRESS:
518 :
519 : /*
520 : * If we wanted to optimize for aborts, we might consider
521 : * marking the page prunable when we see INSERT_IN_PROGRESS.
522 : * But we don't. See related decisions about when to mark the
523 : * page prunable in heapam.c.
524 : */
525 413959 : break;
526 :
527 : default:
528 0 : elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
529 : break;
530 : }
531 :
532 : /*
533 : * Remember the last DEAD tuple seen. We will advance past
534 : * RECENTLY_DEAD tuples just in case there's a DEAD one after them;
535 : * but we can't advance past anything else. (XXX is it really worth
536 : * continuing to scan beyond RECENTLY_DEAD? The case where we will
537 : * find another DEAD tuple is a fairly unusual corner case.)
538 : */
539 562178 : if (tupdead)
540 : {
541 107106 : latestdead = offnum;
542 107106 : HeapTupleHeaderAdvanceLatestRemovedXid(htup,
543 : &prstate->latestRemovedXid);
544 : }
545 455072 : else if (!recent_dead)
546 414400 : break;
547 :
548 : /*
549 : * If the tuple is not HOT-updated, then we are at the end of this
550 : * HOT-update chain.
551 : */
552 147778 : if (!HeapTupleHeaderIsHotUpdated(htup))
553 : break;
554 :
555 : /*
556 : * Advance to next chain member.
557 : */
558 4531 : Assert(ItemPointerGetBlockNumber(&htup->t_ctid) ==
559 : BufferGetBlockNumber(buffer));
560 4531 : offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
561 4531 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
562 7738 : }
563 :
564 : /*
565 : * If we found a DEAD tuple in the chain, adjust the HOT chain so that all
566 : * the DEAD tuples at the start of the chain are removed and the root line
567 : * pointer is appropriately redirected.
568 : */
569 557730 : if (OffsetNumberIsValid(latestdead))
570 : {
571 : /*
572 : * Mark as unused each intermediate item that we are able to remove
573 : * from the chain.
574 : *
575 : * When the previous item is the last dead tuple seen, we are at the
576 : * right candidate for redirection.
577 : */
578 107674 : for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
579 : {
580 3106 : heap_prune_record_unused(prstate, chainitems[i]);
581 3106 : ndeleted++;
582 : }
583 :
584 : /*
585 : * If the root entry had been a normal tuple, we are deleting it, so
586 : * count it in the result. But changing a redirect (even to DEAD
587 : * state) doesn't count.
588 : */
589 104568 : if (ItemIdIsNormal(rootlp))
590 104000 : ndeleted++;
591 :
592 : /*
593 : * If the DEAD tuple is at the end of the chain, the entire chain is
594 : * dead and the root line pointer can be marked dead. Otherwise just
595 : * redirect the root to the correct chain member.
596 : */
597 104568 : if (i >= nchain)
598 103243 : heap_prune_record_dead(prstate, rootoffnum);
599 : else
600 1325 : heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
601 : }
602 453162 : else if (nchain < 2 && ItemIdIsRedirected(rootlp))
603 : {
604 : /*
605 : * We found a redirect item that doesn't point to a valid follow-on
606 : * item. This can happen if the loop in heap_page_prune caused us to
607 : * visit the dead successor of a redirect item before visiting the
608 : * redirect item. We can clean up by setting the redirect item to
609 : * DEAD state.
610 : */
611 22 : heap_prune_record_dead(prstate, rootoffnum);
612 : }
613 :
614 557730 : return ndeleted;
615 : }
616 :
617 : /* Record lowest soon-prunable XID */
618 : static void
619 41113 : heap_prune_record_prunable(PruneState *prstate, TransactionId xid)
620 : {
621 : /*
622 : * This should exactly match the PageSetPrunable macro. We can't store
623 : * directly into the page header yet, so we update working state.
624 : */
625 41113 : Assert(TransactionIdIsNormal(xid));
626 81471 : if (!TransactionIdIsValid(prstate->new_prune_xid) ||
627 40358 : TransactionIdPrecedes(xid, prstate->new_prune_xid))
628 848 : prstate->new_prune_xid = xid;
629 41113 : }
630 :
631 : /* Record item pointer to be redirected */
632 : static void
633 1325 : heap_prune_record_redirect(PruneState *prstate,
634 : OffsetNumber offnum, OffsetNumber rdoffnum)
635 : {
636 1325 : Assert(prstate->nredirected < MaxHeapTuplesPerPage);
637 1325 : prstate->redirected[prstate->nredirected * 2] = offnum;
638 1325 : prstate->redirected[prstate->nredirected * 2 + 1] = rdoffnum;
639 1325 : prstate->nredirected++;
640 1325 : Assert(!prstate->marked[offnum]);
641 1325 : prstate->marked[offnum] = true;
642 1325 : Assert(!prstate->marked[rdoffnum]);
643 1325 : prstate->marked[rdoffnum] = true;
644 1325 : }
645 :
646 : /* Record item pointer to be marked dead */
647 : static void
648 103265 : heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
649 : {
650 103265 : Assert(prstate->ndead < MaxHeapTuplesPerPage);
651 103265 : prstate->nowdead[prstate->ndead] = offnum;
652 103265 : prstate->ndead++;
653 103265 : Assert(!prstate->marked[offnum]);
654 103265 : prstate->marked[offnum] = true;
655 103265 : }
656 :
657 : /* Record item pointer to be marked unused */
658 : static void
659 3391 : heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
660 : {
661 3391 : Assert(prstate->nunused < MaxHeapTuplesPerPage);
662 3391 : prstate->nowunused[prstate->nunused] = offnum;
663 3391 : prstate->nunused++;
664 3391 : Assert(!prstate->marked[offnum]);
665 3391 : prstate->marked[offnum] = true;
666 3391 : }
667 :
668 :
669 : /*
670 : * Perform the actual page changes needed by heap_page_prune.
671 : * It is expected that the caller has suitable pin and lock on the
672 : * buffer, and is inside a critical section.
673 : *
674 : * This is split out because it is also used by heap_xlog_clean()
675 : * to replay the WAL record when needed after a crash. Note that the
676 : * arguments are identical to those of log_heap_clean().
677 : */
678 : void
679 3029 : heap_page_prune_execute(Buffer buffer,
680 : OffsetNumber *redirected, int nredirected,
681 : OffsetNumber *nowdead, int ndead,
682 : OffsetNumber *nowunused, int nunused)
683 : {
684 3029 : Page page = (Page) BufferGetPage(buffer);
685 : OffsetNumber *offnum;
686 : int i;
687 :
688 : /* Update all redirected line pointers */
689 3029 : offnum = redirected;
690 4354 : for (i = 0; i < nredirected; i++)
691 : {
692 1325 : OffsetNumber fromoff = *offnum++;
693 1325 : OffsetNumber tooff = *offnum++;
694 1325 : ItemId fromlp = PageGetItemId(page, fromoff);
695 :
696 1325 : ItemIdSetRedirect(fromlp, tooff);
697 : }
698 :
699 : /* Update all now-dead line pointers */
700 3029 : offnum = nowdead;
701 106294 : for (i = 0; i < ndead; i++)
702 : {
703 103265 : OffsetNumber off = *offnum++;
704 103265 : ItemId lp = PageGetItemId(page, off);
705 :
706 103265 : ItemIdSetDead(lp);
707 : }
708 :
709 : /* Update all now-unused line pointers */
710 3029 : offnum = nowunused;
711 6420 : for (i = 0; i < nunused; i++)
712 : {
713 3391 : OffsetNumber off = *offnum++;
714 3391 : ItemId lp = PageGetItemId(page, off);
715 :
716 3391 : ItemIdSetUnused(lp);
717 : }
718 :
719 : /*
720 : * Finally, repair any fragmentation, and update the page's hint bit about
721 : * whether it has free pointers.
722 : */
723 3029 : PageRepairFragmentation(page);
724 3029 : }
725 :
726 :
727 : /*
728 : * For all items in this page, find their respective root line pointers.
729 : * If item k is part of a HOT-chain with root at item j, then we set
730 : * root_offsets[k - 1] = j.
731 : *
732 : * The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
733 : * We zero out all unused entries.
734 : *
735 : * The function must be called with at least share lock on the buffer, to
736 : * prevent concurrent prune operations.
737 : *
738 : * Note: The information collected here is valid only as long as the caller
739 : * holds a pin on the buffer. Once pin is released, a tuple might be pruned
740 : * and reused by a completely unrelated tuple.
741 : */
742 : void
743 11603 : heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
744 : {
745 : OffsetNumber offnum,
746 : maxoff;
747 :
748 11603 : MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
749 :
750 11603 : maxoff = PageGetMaxOffsetNumber(page);
751 765133 : for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
752 : {
753 753530 : ItemId lp = PageGetItemId(page, offnum);
754 : HeapTupleHeader htup;
755 : OffsetNumber nextoffnum;
756 : TransactionId priorXmax;
757 :
758 : /* skip unused and dead items */
759 753530 : if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
760 162 : continue;
761 :
762 753368 : if (ItemIdIsNormal(lp))
763 : {
764 753363 : htup = (HeapTupleHeader) PageGetItem(page, lp);
765 :
766 : /*
767 : * Check if this tuple is part of a HOT-chain rooted at some other
768 : * tuple. If so, skip it for now; we'll process it when we find
769 : * its root.
770 : */
771 753363 : if (HeapTupleHeaderIsHeapOnly(htup))
772 64 : continue;
773 :
774 : /*
775 : * This is either a plain tuple or the root of a HOT-chain.
776 : * Remember it in the mapping.
777 : */
778 753299 : root_offsets[offnum - 1] = offnum;
779 :
780 : /* If it's not the start of a HOT-chain, we're done with it */
781 753299 : if (!HeapTupleHeaderIsHotUpdated(htup))
782 753253 : continue;
783 :
784 : /* Set up to scan the HOT-chain */
785 46 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
786 46 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
787 : }
788 : else
789 : {
790 : /* Must be a redirect item. We do not set its root_offsets entry */
791 5 : Assert(ItemIdIsRedirected(lp));
792 : /* Set up to scan the HOT-chain */
793 5 : nextoffnum = ItemIdGetRedirect(lp);
794 5 : priorXmax = InvalidTransactionId;
795 : }
796 :
797 : /*
798 : * Now follow the HOT-chain and collect other tuples in the chain.
799 : *
800 : * Note: Even though this is a nested loop, the complexity of the
801 : * function is O(N) because a tuple in the page should be visited not
802 : * more than twice, once in the outer loop and once in HOT-chain
803 : * chases.
804 : */
805 : for (;;)
806 : {
807 64 : lp = PageGetItemId(page, nextoffnum);
808 :
809 : /* Check for broken chains */
810 64 : if (!ItemIdIsNormal(lp))
811 0 : break;
812 :
813 64 : htup = (HeapTupleHeader) PageGetItem(page, lp);
814 :
815 123 : if (TransactionIdIsValid(priorXmax) &&
816 59 : !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
817 0 : break;
818 :
819 : /* Remember the root line pointer for this item */
820 64 : root_offsets[nextoffnum - 1] = offnum;
821 :
822 : /* Advance to next chain member, if any */
823 64 : if (!HeapTupleHeaderIsHotUpdated(htup))
824 : break;
825 :
826 13 : nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
827 13 : priorXmax = HeapTupleHeaderGetUpdateXid(htup);
828 13 : }
829 : }
830 11603 : }
|