Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * procarray.c
4 : * POSTGRES process array code.
5 : *
6 : *
7 : * This module maintains arrays of the PGPROC and PGXACT structures for all
8 : * active backends. Although there are several uses for this, the principal
9 : * one is as a means of determining the set of currently running transactions.
10 : *
11 : * Because of various subtle race conditions it is critical that a backend
12 : * hold the correct locks while setting or clearing its MyPgXact->xid field.
13 : * See notes in src/backend/access/transam/README.
14 : *
15 : * The process arrays now also include structures representing prepared
16 : * transactions. The xid and subxids fields of these are valid, as are the
17 : * myProcLocks lists. They can be distinguished from regular backend PGPROCs
18 : * at need by checking for pid == 0.
19 : *
20 : * During hot standby, we also keep a list of XIDs representing transactions
21 : * that are known to be running in the master (or more precisely, were running
22 : * as of the current point in the WAL stream). This list is kept in the
23 : * KnownAssignedXids array, and is updated by watching the sequence of
24 : * arriving XIDs. This is necessary because if we leave those XIDs out of
25 : * snapshots taken for standby queries, then they will appear to be already
26 : * complete, leading to MVCC failures. Note that in hot standby, the PGPROC
27 : * array represents standby processes, which by definition are not running
28 : * transactions that have XIDs.
29 : *
30 : * It is perhaps possible for a backend on the master to terminate without
31 : * writing an abort record for its transaction. While that shouldn't really
32 : * happen, it would tie up KnownAssignedXids indefinitely, so we protect
33 : * ourselves by pruning the array when a valid list of running XIDs arrives.
34 : *
35 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
36 : * Portions Copyright (c) 1994, Regents of the University of California
37 : *
38 : *
39 : * IDENTIFICATION
40 : * src/backend/storage/ipc/procarray.c
41 : *
42 : *-------------------------------------------------------------------------
43 : */
44 : #include "postgres.h"
45 :
46 : #include <signal.h>
47 :
48 : #include "access/clog.h"
49 : #include "access/subtrans.h"
50 : #include "access/transam.h"
51 : #include "access/twophase.h"
52 : #include "access/xact.h"
53 : #include "access/xlog.h"
54 : #include "catalog/catalog.h"
55 : #include "miscadmin.h"
56 : #include "pgstat.h"
57 : #include "storage/proc.h"
58 : #include "storage/procarray.h"
59 : #include "storage/spin.h"
60 : #include "utils/builtins.h"
61 : #include "utils/rel.h"
62 : #include "utils/snapmgr.h"
63 :
64 :
65 : /* Our shared memory area */
66 : typedef struct ProcArrayStruct
67 : {
68 : int numProcs; /* number of valid procs entries */
69 : int maxProcs; /* allocated size of procs array */
70 :
71 : /*
72 : * Known assigned XIDs handling
73 : */
74 : int maxKnownAssignedXids; /* allocated size of array */
75 : int numKnownAssignedXids; /* current # of valid entries */
76 : int tailKnownAssignedXids; /* index of oldest valid element */
77 : int headKnownAssignedXids; /* index of newest element, + 1 */
78 : slock_t known_assigned_xids_lck; /* protects head/tail pointers */
79 :
80 : /*
81 : * Highest subxid that has been removed from KnownAssignedXids array to
82 : * prevent overflow; or InvalidTransactionId if none. We track this for
83 : * similar reasons to tracking overflowing cached subxids in PGXACT
84 : * entries. Must hold exclusive ProcArrayLock to change this, and shared
85 : * lock to read it.
86 : */
87 : TransactionId lastOverflowedXid;
88 :
89 : /* oldest xmin of any replication slot */
90 : TransactionId replication_slot_xmin;
91 : /* oldest catalog xmin of any replication slot */
92 : TransactionId replication_slot_catalog_xmin;
93 :
94 : /* indexes into allPgXact[], has PROCARRAY_MAXPROCS entries */
95 : int pgprocnos[FLEXIBLE_ARRAY_MEMBER];
96 : } ProcArrayStruct;
97 :
98 : static ProcArrayStruct *procArray;
99 :
100 : static PGPROC *allProcs;
101 : static PGXACT *allPgXact;
102 :
103 : /*
104 : * Bookkeeping for tracking emulated transactions in recovery
105 : */
106 : static TransactionId *KnownAssignedXids;
107 : static bool *KnownAssignedXidsValid;
108 : static TransactionId latestObservedXid = InvalidTransactionId;
109 :
110 : /*
111 : * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
112 : * the highest xid that might still be running that we don't have in
113 : * KnownAssignedXids.
114 : */
115 : static TransactionId standbySnapshotPendingXmin;
116 :
117 : #ifdef XIDCACHE_DEBUG
118 :
119 : /* counters for XidCache measurement */
120 : static long xc_by_recent_xmin = 0;
121 : static long xc_by_known_xact = 0;
122 : static long xc_by_my_xact = 0;
123 : static long xc_by_latest_xid = 0;
124 : static long xc_by_main_xid = 0;
125 : static long xc_by_child_xid = 0;
126 : static long xc_by_known_assigned = 0;
127 : static long xc_no_overflow = 0;
128 : static long xc_slow_answer = 0;
129 :
130 : #define xc_by_recent_xmin_inc() (xc_by_recent_xmin++)
131 : #define xc_by_known_xact_inc() (xc_by_known_xact++)
132 : #define xc_by_my_xact_inc() (xc_by_my_xact++)
133 : #define xc_by_latest_xid_inc() (xc_by_latest_xid++)
134 : #define xc_by_main_xid_inc() (xc_by_main_xid++)
135 : #define xc_by_child_xid_inc() (xc_by_child_xid++)
136 : #define xc_by_known_assigned_inc() (xc_by_known_assigned++)
137 : #define xc_no_overflow_inc() (xc_no_overflow++)
138 : #define xc_slow_answer_inc() (xc_slow_answer++)
139 :
140 : static void DisplayXidCache(void);
141 : #else /* !XIDCACHE_DEBUG */
142 :
143 : #define xc_by_recent_xmin_inc() ((void) 0)
144 : #define xc_by_known_xact_inc() ((void) 0)
145 : #define xc_by_my_xact_inc() ((void) 0)
146 : #define xc_by_latest_xid_inc() ((void) 0)
147 : #define xc_by_main_xid_inc() ((void) 0)
148 : #define xc_by_child_xid_inc() ((void) 0)
149 : #define xc_by_known_assigned_inc() ((void) 0)
150 : #define xc_no_overflow_inc() ((void) 0)
151 : #define xc_slow_answer_inc() ((void) 0)
152 : #endif /* XIDCACHE_DEBUG */
153 :
154 : /* Primitives for KnownAssignedXids array handling for standby */
155 : static void KnownAssignedXidsCompress(bool force);
156 : static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
157 : bool exclusive_lock);
158 : static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
159 : static bool KnownAssignedXidExists(TransactionId xid);
160 : static void KnownAssignedXidsRemove(TransactionId xid);
161 : static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
162 : TransactionId *subxids);
163 : static void KnownAssignedXidsRemovePreceding(TransactionId xid);
164 : static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
165 : static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray,
166 : TransactionId *xmin,
167 : TransactionId xmax);
168 : static TransactionId KnownAssignedXidsGetOldestXmin(void);
169 : static void KnownAssignedXidsDisplay(int trace_level);
170 : static void KnownAssignedXidsReset(void);
171 : static inline void ProcArrayEndTransactionInternal(PGPROC *proc,
172 : PGXACT *pgxact, TransactionId latestXid);
173 : static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid);
174 :
175 : /*
176 : * Report shared-memory space needed by CreateSharedProcArray.
177 : */
178 : Size
179 5 : ProcArrayShmemSize(void)
180 : {
181 : Size size;
182 :
183 : /* Size of the ProcArray structure itself */
184 : #define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts)
185 :
186 5 : size = offsetof(ProcArrayStruct, pgprocnos);
187 5 : size = add_size(size, mul_size(sizeof(int), PROCARRAY_MAXPROCS));
188 :
189 : /*
190 : * During Hot Standby processing we have a data structure called
191 : * KnownAssignedXids, created in shared memory. Local data structures are
192 : * also created in various backends during GetSnapshotData(),
193 : * TransactionIdIsInProgress() and GetRunningTransactionData(). All of the
194 : * main structures created in those functions must be identically sized,
195 : * since we may at times copy the whole of the data structures around. We
196 : * refer to this size as TOTAL_MAX_CACHED_SUBXIDS.
197 : *
198 : * Ideally we'd only create this structure if we were actually doing hot
199 : * standby in the current run, but we don't know that yet at the time
200 : * shared memory is being set up.
201 : */
202 : #define TOTAL_MAX_CACHED_SUBXIDS \
203 : ((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)
204 :
205 5 : if (EnableHotStandby)
206 : {
207 5 : size = add_size(size,
208 : mul_size(sizeof(TransactionId),
209 5 : TOTAL_MAX_CACHED_SUBXIDS));
210 5 : size = add_size(size,
211 5 : mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
212 : }
213 :
214 5 : return size;
215 : }
216 :
217 : /*
218 : * Initialize the shared PGPROC array during postmaster startup.
219 : */
220 : void
221 5 : CreateSharedProcArray(void)
222 : {
223 : bool found;
224 :
225 : /* Create or attach to the ProcArray shared structure */
226 5 : procArray = (ProcArrayStruct *)
227 5 : ShmemInitStruct("Proc Array",
228 : add_size(offsetof(ProcArrayStruct, pgprocnos),
229 : mul_size(sizeof(int),
230 5 : PROCARRAY_MAXPROCS)),
231 : &found);
232 :
233 5 : if (!found)
234 : {
235 : /*
236 : * We're the first - initialize.
237 : */
238 5 : procArray->numProcs = 0;
239 5 : procArray->maxProcs = PROCARRAY_MAXPROCS;
240 5 : procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
241 5 : procArray->numKnownAssignedXids = 0;
242 5 : procArray->tailKnownAssignedXids = 0;
243 5 : procArray->headKnownAssignedXids = 0;
244 5 : SpinLockInit(&procArray->known_assigned_xids_lck);
245 5 : procArray->lastOverflowedXid = InvalidTransactionId;
246 5 : procArray->replication_slot_xmin = InvalidTransactionId;
247 5 : procArray->replication_slot_catalog_xmin = InvalidTransactionId;
248 : }
249 :
250 5 : allProcs = ProcGlobal->allProcs;
251 5 : allPgXact = ProcGlobal->allPgXact;
252 :
253 : /* Create or attach to the KnownAssignedXids arrays too, if needed */
254 5 : if (EnableHotStandby)
255 : {
256 5 : KnownAssignedXids = (TransactionId *)
257 5 : ShmemInitStruct("KnownAssignedXids",
258 : mul_size(sizeof(TransactionId),
259 5 : TOTAL_MAX_CACHED_SUBXIDS),
260 : &found);
261 5 : KnownAssignedXidsValid = (bool *)
262 5 : ShmemInitStruct("KnownAssignedXidsValid",
263 5 : mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
264 : &found);
265 : }
266 :
267 : /* Register and initialize fields of ProcLWLockTranche */
268 5 : LWLockRegisterTranche(LWTRANCHE_PROC, "proc");
269 5 : }
270 :
271 : /*
272 : * Add the specified PGPROC to the shared array.
273 : */
274 : void
275 344 : ProcArrayAdd(PGPROC *proc)
276 : {
277 344 : ProcArrayStruct *arrayP = procArray;
278 : int index;
279 :
280 344 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
281 :
282 344 : if (arrayP->numProcs >= arrayP->maxProcs)
283 : {
284 : /*
285 : * Oops, no room. (This really shouldn't happen, since there is a
286 : * fixed supply of PGPROC structs too, and so we should have failed
287 : * earlier.)
288 : */
289 0 : LWLockRelease(ProcArrayLock);
290 0 : ereport(FATAL,
291 : (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
292 : errmsg("sorry, too many clients already")));
293 : }
294 :
295 : /*
296 : * Keep the procs array sorted by (PGPROC *) so that we can utilize
297 : * locality of references much better. This is useful while traversing the
298 : * ProcArray because there is an increased likelihood of finding the next
299 : * PGPROC structure in the cache.
300 : *
301 : * Since the occurrence of adding/removing a proc is much lower than the
302 : * access to the ProcArray itself, the overhead should be marginal
303 : */
304 981 : for (index = 0; index < arrayP->numProcs; index++)
305 : {
306 : /*
307 : * If we are the first PGPROC or if we have found our right position
308 : * in the array, break
309 : */
310 972 : if ((arrayP->pgprocnos[index] == -1) || (arrayP->pgprocnos[index] > proc->pgprocno))
311 : break;
312 : }
313 :
314 344 : memmove(&arrayP->pgprocnos[index + 1], &arrayP->pgprocnos[index],
315 344 : (arrayP->numProcs - index) * sizeof(int));
316 344 : arrayP->pgprocnos[index] = proc->pgprocno;
317 344 : arrayP->numProcs++;
318 :
319 344 : LWLockRelease(ProcArrayLock);
320 344 : }
321 :
322 : /*
323 : * Remove the specified PGPROC from the shared array.
324 : *
325 : * When latestXid is a valid XID, we are removing a live 2PC gxact from the
326 : * array, and thus causing it to appear as "not running" anymore. In this
327 : * case we must advance latestCompletedXid. (This is essentially the same
328 : * as ProcArrayEndTransaction followed by removal of the PGPROC, but we take
329 : * the ProcArrayLock only once, and don't damage the content of the PGPROC;
330 : * twophase.c depends on the latter.)
331 : */
332 : void
333 344 : ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
334 : {
335 344 : ProcArrayStruct *arrayP = procArray;
336 : int index;
337 :
338 : #ifdef XIDCACHE_DEBUG
339 : /* dump stats at backend shutdown, but not prepared-xact end */
340 : if (proc->pid != 0)
341 : DisplayXidCache();
342 : #endif
343 :
344 344 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
345 :
346 344 : if (TransactionIdIsValid(latestXid))
347 : {
348 6 : Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
349 :
350 : /* Advance global latestCompletedXid while holding the lock */
351 6 : if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
352 : latestXid))
353 1 : ShmemVariableCache->latestCompletedXid = latestXid;
354 : }
355 : else
356 : {
357 : /* Shouldn't be trying to remove a live transaction here */
358 338 : Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
359 : }
360 :
361 1055 : for (index = 0; index < arrayP->numProcs; index++)
362 : {
363 1055 : if (arrayP->pgprocnos[index] == proc->pgprocno)
364 : {
365 : /* Keep the PGPROC array sorted. See notes above */
366 344 : memmove(&arrayP->pgprocnos[index], &arrayP->pgprocnos[index + 1],
367 344 : (arrayP->numProcs - index - 1) * sizeof(int));
368 344 : arrayP->pgprocnos[arrayP->numProcs - 1] = -1; /* for debugging */
369 344 : arrayP->numProcs--;
370 344 : LWLockRelease(ProcArrayLock);
371 688 : return;
372 : }
373 : }
374 :
375 : /* Oops */
376 0 : LWLockRelease(ProcArrayLock);
377 :
378 0 : elog(LOG, "failed to find proc %p in ProcArray", proc);
379 : }
380 :
381 :
382 : /*
383 : * ProcArrayEndTransaction -- mark a transaction as no longer running
384 : *
385 : * This is used interchangeably for commit and abort cases. The transaction
386 : * commit/abort must already be reported to WAL and pg_xact.
387 : *
388 : * proc is currently always MyProc, but we pass it explicitly for flexibility.
389 : * latestXid is the latest Xid among the transaction's main XID and
390 : * subtransactions, or InvalidTransactionId if it has no XID. (We must ask
391 : * the caller to pass latestXid, instead of computing it from the PGPROC's
392 : * contents, because the subxid information in the PGPROC might be
393 : * incomplete.)
394 : */
395 : void
396 26161 : ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
397 : {
398 26161 : PGXACT *pgxact = &allPgXact[proc->pgprocno];
399 :
400 26161 : if (TransactionIdIsValid(latestXid))
401 : {
402 : /*
403 : * We must lock ProcArrayLock while clearing our advertised XID, so
404 : * that we do not exit the set of "running" transactions while someone
405 : * else is taking a snapshot. See discussion in
406 : * src/backend/access/transam/README.
407 : */
408 10556 : Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
409 :
410 : /*
411 : * If we can immediately acquire ProcArrayLock, we clear our own XID
412 : * and release the lock. If not, use group XID clearing to improve
413 : * efficiency.
414 : */
415 10556 : if (LWLockConditionalAcquire(ProcArrayLock, LW_EXCLUSIVE))
416 : {
417 10538 : ProcArrayEndTransactionInternal(proc, pgxact, latestXid);
418 10538 : LWLockRelease(ProcArrayLock);
419 : }
420 : else
421 18 : ProcArrayGroupClearXid(proc, latestXid);
422 : }
423 : else
424 : {
425 : /*
426 : * If we have no XID, we don't need to lock, since we won't affect
427 : * anyone else's calculation of a snapshot. We might change their
428 : * estimate of global xmin, but that's OK.
429 : */
430 15605 : Assert(!TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
431 :
432 15605 : proc->lxid = InvalidLocalTransactionId;
433 15605 : pgxact->xmin = InvalidTransactionId;
434 : /* must be cleared with xid/xmin: */
435 15605 : pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
436 15605 : pgxact->delayChkpt = false; /* be sure this is cleared in abort */
437 15605 : proc->recoveryConflictPending = false;
438 :
439 15605 : Assert(pgxact->nxids == 0);
440 15605 : Assert(pgxact->overflowed == false);
441 : }
442 26161 : }
443 :
444 : /*
445 : * Mark a write transaction as no longer running.
446 : *
447 : * We don't do any locking here; caller must handle that.
448 : */
449 : static inline void
450 10556 : ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
451 : TransactionId latestXid)
452 : {
453 10556 : pgxact->xid = InvalidTransactionId;
454 10556 : proc->lxid = InvalidLocalTransactionId;
455 10556 : pgxact->xmin = InvalidTransactionId;
456 : /* must be cleared with xid/xmin: */
457 10556 : pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
458 10556 : pgxact->delayChkpt = false; /* be sure this is cleared in abort */
459 10556 : proc->recoveryConflictPending = false;
460 :
461 : /* Clear the subtransaction-XID cache too while holding the lock */
462 10556 : pgxact->nxids = 0;
463 10556 : pgxact->overflowed = false;
464 :
465 : /* Also advance global latestCompletedXid while holding the lock */
466 10556 : if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
467 : latestXid))
468 9332 : ShmemVariableCache->latestCompletedXid = latestXid;
469 10556 : }
470 :
471 : /*
472 : * ProcArrayGroupClearXid -- group XID clearing
473 : *
474 : * When we cannot immediately acquire ProcArrayLock in exclusive mode at
475 : * commit time, add ourselves to a list of processes that need their XIDs
476 : * cleared. The first process to add itself to the list will acquire
477 : * ProcArrayLock in exclusive mode and perform ProcArrayEndTransactionInternal
478 : * on behalf of all group members. This avoids a great deal of contention
479 : * around ProcArrayLock when many processes are trying to commit at once,
480 : * since the lock need not be repeatedly handed off from one committing
481 : * process to the next.
482 : */
483 : static void
484 18 : ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid)
485 : {
486 18 : volatile PROC_HDR *procglobal = ProcGlobal;
487 : uint32 nextidx;
488 : uint32 wakeidx;
489 :
490 : /* We should definitely have an XID to clear. */
491 18 : Assert(TransactionIdIsValid(allPgXact[proc->pgprocno].xid));
492 :
493 : /* Add ourselves to the list of processes needing a group XID clear. */
494 18 : proc->procArrayGroupMember = true;
495 18 : proc->procArrayGroupMemberXid = latestXid;
496 : while (true)
497 : {
498 18 : nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
499 18 : pg_atomic_write_u32(&proc->procArrayGroupNext, nextidx);
500 :
501 18 : if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst,
502 : &nextidx,
503 18 : (uint32) proc->pgprocno))
504 18 : break;
505 0 : }
506 :
507 : /*
508 : * If the list was not empty, the leader will clear our XID. It is
509 : * impossible to have followers without a leader because the first process
510 : * that has added itself to the list will always have nextidx as
511 : * INVALID_PGPROCNO.
512 : */
513 18 : if (nextidx != INVALID_PGPROCNO)
514 : {
515 2 : int extraWaits = 0;
516 :
517 : /* Sleep until the leader clears our XID. */
518 2 : pgstat_report_wait_start(WAIT_EVENT_PROCARRAY_GROUP_UPDATE);
519 : for (;;)
520 : {
521 : /* acts as a read barrier */
522 2 : PGSemaphoreLock(proc->sem);
523 2 : if (!proc->procArrayGroupMember)
524 2 : break;
525 0 : extraWaits++;
526 0 : }
527 2 : pgstat_report_wait_end();
528 :
529 2 : Assert(pg_atomic_read_u32(&proc->procArrayGroupNext) == INVALID_PGPROCNO);
530 :
531 : /* Fix semaphore count for any absorbed wakeups */
532 4 : while (extraWaits-- > 0)
533 0 : PGSemaphoreUnlock(proc->sem);
534 4 : return;
535 : }
536 :
537 : /* We are the leader. Acquire the lock on behalf of everyone. */
538 16 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
539 :
540 : /*
541 : * Now that we've got the lock, clear the list of processes waiting for
542 : * group XID clearing, saving a pointer to the head of the list. Trying
543 : * to pop elements one at a time could lead to an ABA problem.
544 : */
545 : while (true)
546 : {
547 16 : nextidx = pg_atomic_read_u32(&procglobal->procArrayGroupFirst);
548 16 : if (pg_atomic_compare_exchange_u32(&procglobal->procArrayGroupFirst,
549 : &nextidx,
550 : INVALID_PGPROCNO))
551 16 : break;
552 0 : }
553 :
554 : /* Remember head of list so we can perform wakeups after dropping lock. */
555 16 : wakeidx = nextidx;
556 :
557 : /* Walk the list and clear all XIDs. */
558 50 : while (nextidx != INVALID_PGPROCNO)
559 : {
560 18 : PGPROC *proc = &allProcs[nextidx];
561 18 : PGXACT *pgxact = &allPgXact[nextidx];
562 :
563 18 : ProcArrayEndTransactionInternal(proc, pgxact, proc->procArrayGroupMemberXid);
564 :
565 : /* Move to next proc in list. */
566 18 : nextidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
567 : }
568 :
569 : /* We're done with the lock now. */
570 16 : LWLockRelease(ProcArrayLock);
571 :
572 : /*
573 : * Now that we've released the lock, go back and wake everybody up. We
574 : * don't do this under the lock so as to keep lock hold times to a
575 : * minimum. The system calls we need to perform to wake other processes
576 : * up are probably much slower than the simple memory writes we did while
577 : * holding the lock.
578 : */
579 50 : while (wakeidx != INVALID_PGPROCNO)
580 : {
581 18 : PGPROC *proc = &allProcs[wakeidx];
582 :
583 18 : wakeidx = pg_atomic_read_u32(&proc->procArrayGroupNext);
584 18 : pg_atomic_write_u32(&proc->procArrayGroupNext, INVALID_PGPROCNO);
585 :
586 : /* ensure all previous writes are visible before follower continues. */
587 18 : pg_write_barrier();
588 :
589 18 : proc->procArrayGroupMember = false;
590 :
591 18 : if (proc != MyProc)
592 2 : PGSemaphoreUnlock(proc->sem);
593 : }
594 : }
595 :
596 : /*
597 : * ProcArrayClearTransaction -- clear the transaction fields
598 : *
599 : * This is used after successfully preparing a 2-phase transaction. We are
600 : * not actually reporting the transaction's XID as no longer running --- it
601 : * will still appear as running because the 2PC's gxact is in the ProcArray
602 : * too. We just have to clear out our own PGXACT.
603 : */
604 : void
605 6 : ProcArrayClearTransaction(PGPROC *proc)
606 : {
607 6 : PGXACT *pgxact = &allPgXact[proc->pgprocno];
608 :
609 : /*
610 : * We can skip locking ProcArrayLock here, because this action does not
611 : * actually change anyone's view of the set of running XIDs: our entry is
612 : * duplicate with the gxact that has already been inserted into the
613 : * ProcArray.
614 : */
615 6 : pgxact->xid = InvalidTransactionId;
616 6 : proc->lxid = InvalidLocalTransactionId;
617 6 : pgxact->xmin = InvalidTransactionId;
618 6 : proc->recoveryConflictPending = false;
619 :
620 : /* redundant, but just in case */
621 6 : pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
622 6 : pgxact->delayChkpt = false;
623 :
624 : /* Clear the subtransaction-XID cache too */
625 6 : pgxact->nxids = 0;
626 6 : pgxact->overflowed = false;
627 6 : }
628 :
629 : /*
630 : * ProcArrayInitRecovery -- initialize recovery xid mgmt environment
631 : *
632 : * Remember up to where the startup process initialized the CLOG and subtrans
633 : * so we can ensure it's initialized gaplessly up to the point where necessary
634 : * while in recovery.
635 : */
636 : void
637 0 : ProcArrayInitRecovery(TransactionId initializedUptoXID)
638 : {
639 0 : Assert(standbyState == STANDBY_INITIALIZED);
640 0 : Assert(TransactionIdIsNormal(initializedUptoXID));
641 :
642 : /*
643 : * we set latestObservedXid to the xid SUBTRANS has been initialized up
644 : * to, so we can extend it from that point onwards in
645 : * RecordKnownAssignedTransactionIds, and when we get consistent in
646 : * ProcArrayApplyRecoveryInfo().
647 : */
648 0 : latestObservedXid = initializedUptoXID;
649 0 : TransactionIdRetreat(latestObservedXid);
650 0 : }
651 :
652 : /*
653 : * ProcArrayApplyRecoveryInfo -- apply recovery info about xids
654 : *
655 : * Takes us through 3 states: Initialized, Pending and Ready.
656 : * Normal case is to go all the way to Ready straight away, though there
657 : * are atypical cases where we need to take it in steps.
658 : *
659 : * Use the data about running transactions on master to create the initial
660 : * state of KnownAssignedXids. We also use these records to regularly prune
661 : * KnownAssignedXids because we know it is possible that some transactions
662 : * with FATAL errors fail to write abort records, which could cause eventual
663 : * overflow.
664 : *
665 : * See comments for LogStandbySnapshot().
666 : */
667 : void
668 0 : ProcArrayApplyRecoveryInfo(RunningTransactions running)
669 : {
670 : TransactionId *xids;
671 : int nxids;
672 : TransactionId nextXid;
673 : int i;
674 :
675 0 : Assert(standbyState >= STANDBY_INITIALIZED);
676 0 : Assert(TransactionIdIsValid(running->nextXid));
677 0 : Assert(TransactionIdIsValid(running->oldestRunningXid));
678 0 : Assert(TransactionIdIsNormal(running->latestCompletedXid));
679 :
680 : /*
681 : * Remove stale transactions, if any.
682 : */
683 0 : ExpireOldKnownAssignedTransactionIds(running->oldestRunningXid);
684 :
685 : /*
686 : * Remove stale locks, if any.
687 : *
688 : * Locks are always assigned to the toplevel xid so we don't need to care
689 : * about subxcnt/subxids (and by extension not about ->suboverflowed).
690 : */
691 0 : StandbyReleaseOldLocks(running->xcnt, running->xids);
692 :
693 : /*
694 : * If our snapshot is already valid, nothing else to do...
695 : */
696 0 : if (standbyState == STANDBY_SNAPSHOT_READY)
697 0 : return;
698 :
699 : /*
700 : * If our initial RunningTransactionsData had an overflowed snapshot then
701 : * we knew we were missing some subxids from our snapshot. If we continue
702 : * to see overflowed snapshots then we might never be able to start up, so
703 : * we make another test to see if our snapshot is now valid. We know that
704 : * the missing subxids are equal to or earlier than nextXid. After we
705 : * initialise we continue to apply changes during recovery, so once the
706 : * oldestRunningXid is later than the nextXid from the initial snapshot we
707 : * know that we no longer have missing information and can mark the
708 : * snapshot as valid.
709 : */
710 0 : if (standbyState == STANDBY_SNAPSHOT_PENDING)
711 : {
712 : /*
713 : * If the snapshot isn't overflowed or if its empty we can reset our
714 : * pending state and use this snapshot instead.
715 : */
716 0 : if (!running->subxid_overflow || running->xcnt == 0)
717 : {
718 : /*
719 : * If we have already collected known assigned xids, we need to
720 : * throw them away before we apply the recovery snapshot.
721 : */
722 0 : KnownAssignedXidsReset();
723 0 : standbyState = STANDBY_INITIALIZED;
724 : }
725 : else
726 : {
727 0 : if (TransactionIdPrecedes(standbySnapshotPendingXmin,
728 : running->oldestRunningXid))
729 : {
730 0 : standbyState = STANDBY_SNAPSHOT_READY;
731 0 : elog(trace_recovery(DEBUG1),
732 : "recovery snapshots are now enabled");
733 : }
734 : else
735 0 : elog(trace_recovery(DEBUG1),
736 : "recovery snapshot waiting for non-overflowed snapshot or "
737 : "until oldest active xid on standby is at least %u (now %u)",
738 : standbySnapshotPendingXmin,
739 : running->oldestRunningXid);
740 0 : return;
741 : }
742 : }
743 :
744 0 : Assert(standbyState == STANDBY_INITIALIZED);
745 :
746 : /*
747 : * OK, we need to initialise from the RunningTransactionsData record.
748 : *
749 : * NB: this can be reached at least twice, so make sure new code can deal
750 : * with that.
751 : */
752 :
753 : /*
754 : * Nobody else is running yet, but take locks anyhow
755 : */
756 0 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
757 :
758 : /*
759 : * KnownAssignedXids is sorted so we cannot just add the xids, we have to
760 : * sort them first.
761 : *
762 : * Some of the new xids are top-level xids and some are subtransactions.
763 : * We don't call SubtransSetParent because it doesn't matter yet. If we
764 : * aren't overflowed then all xids will fit in snapshot and so we don't
765 : * need subtrans. If we later overflow, an xid assignment record will add
766 : * xids to subtrans. If RunningXacts is overflowed then we don't have
767 : * enough information to correctly update subtrans anyway.
768 : */
769 :
770 : /*
771 : * Allocate a temporary array to avoid modifying the array passed as
772 : * argument.
773 : */
774 0 : xids = palloc(sizeof(TransactionId) * (running->xcnt + running->subxcnt));
775 :
776 : /*
777 : * Add to the temp array any xids which have not already completed.
778 : */
779 0 : nxids = 0;
780 0 : for (i = 0; i < running->xcnt + running->subxcnt; i++)
781 : {
782 0 : TransactionId xid = running->xids[i];
783 :
784 : /*
785 : * The running-xacts snapshot can contain xids that were still visible
786 : * in the procarray when the snapshot was taken, but were already
787 : * WAL-logged as completed. They're not running anymore, so ignore
788 : * them.
789 : */
790 0 : if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
791 0 : continue;
792 :
793 0 : xids[nxids++] = xid;
794 : }
795 :
796 0 : if (nxids > 0)
797 : {
798 0 : if (procArray->numKnownAssignedXids != 0)
799 : {
800 0 : LWLockRelease(ProcArrayLock);
801 0 : elog(ERROR, "KnownAssignedXids is not empty");
802 : }
803 :
804 : /*
805 : * Sort the array so that we can add them safely into
806 : * KnownAssignedXids.
807 : */
808 0 : qsort(xids, nxids, sizeof(TransactionId), xidComparator);
809 :
810 : /*
811 : * Add the sorted snapshot into KnownAssignedXids
812 : */
813 0 : for (i = 0; i < nxids; i++)
814 0 : KnownAssignedXidsAdd(xids[i], xids[i], true);
815 :
816 0 : KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
817 : }
818 :
819 0 : pfree(xids);
820 :
821 : /*
822 : * latestObservedXid is at least set to the point where SUBTRANS was
823 : * started up to (c.f. ProcArrayInitRecovery()) or to the biggest xid
824 : * RecordKnownAssignedTransactionIds() was called for. Initialize
825 : * subtrans from thereon, up to nextXid - 1.
826 : *
827 : * We need to duplicate parts of RecordKnownAssignedTransactionId() here,
828 : * because we've just added xids to the known assigned xids machinery that
829 : * haven't gone through RecordKnownAssignedTransactionId().
830 : */
831 0 : Assert(TransactionIdIsNormal(latestObservedXid));
832 0 : TransactionIdAdvance(latestObservedXid);
833 0 : while (TransactionIdPrecedes(latestObservedXid, running->nextXid))
834 : {
835 0 : ExtendSUBTRANS(latestObservedXid);
836 0 : TransactionIdAdvance(latestObservedXid);
837 : }
838 0 : TransactionIdRetreat(latestObservedXid); /* = running->nextXid - 1 */
839 :
840 : /* ----------
841 : * Now we've got the running xids we need to set the global values that
842 : * are used to track snapshots as they evolve further.
843 : *
844 : * - latestCompletedXid which will be the xmax for snapshots
845 : * - lastOverflowedXid which shows whether snapshots overflow
846 : * - nextXid
847 : *
848 : * If the snapshot overflowed, then we still initialise with what we know,
849 : * but the recovery snapshot isn't fully valid yet because we know there
850 : * are some subxids missing. We don't know the specific subxids that are
851 : * missing, so conservatively assume the last one is latestObservedXid.
852 : * ----------
853 : */
854 0 : if (running->subxid_overflow)
855 : {
856 0 : standbyState = STANDBY_SNAPSHOT_PENDING;
857 :
858 0 : standbySnapshotPendingXmin = latestObservedXid;
859 0 : procArray->lastOverflowedXid = latestObservedXid;
860 : }
861 : else
862 : {
863 0 : standbyState = STANDBY_SNAPSHOT_READY;
864 :
865 0 : standbySnapshotPendingXmin = InvalidTransactionId;
866 : }
867 :
868 : /*
869 : * If a transaction wrote a commit record in the gap between taking and
870 : * logging the snapshot then latestCompletedXid may already be higher than
871 : * the value from the snapshot, so check before we use the incoming value.
872 : */
873 0 : if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
874 : running->latestCompletedXid))
875 0 : ShmemVariableCache->latestCompletedXid = running->latestCompletedXid;
876 :
877 0 : Assert(TransactionIdIsNormal(ShmemVariableCache->latestCompletedXid));
878 :
879 0 : LWLockRelease(ProcArrayLock);
880 :
881 : /*
882 : * ShmemVariableCache->nextXid must be beyond any observed xid.
883 : *
884 : * We don't expect anyone else to modify nextXid, hence we don't need to
885 : * hold a lock while examining it. We still acquire the lock to modify
886 : * it, though.
887 : */
888 0 : nextXid = latestObservedXid;
889 0 : TransactionIdAdvance(nextXid);
890 0 : if (TransactionIdFollows(nextXid, ShmemVariableCache->nextXid))
891 : {
892 0 : LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
893 0 : ShmemVariableCache->nextXid = nextXid;
894 0 : LWLockRelease(XidGenLock);
895 : }
896 :
897 0 : Assert(TransactionIdIsValid(ShmemVariableCache->nextXid));
898 :
899 0 : KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
900 0 : if (standbyState == STANDBY_SNAPSHOT_READY)
901 0 : elog(trace_recovery(DEBUG1), "recovery snapshots are now enabled");
902 : else
903 0 : elog(trace_recovery(DEBUG1),
904 : "recovery snapshot waiting for non-overflowed snapshot or "
905 : "until oldest active xid on standby is at least %u (now %u)",
906 : standbySnapshotPendingXmin,
907 : running->oldestRunningXid);
908 : }
909 :
910 : /*
911 : * ProcArrayApplyXidAssignment
912 : * Process an XLOG_XACT_ASSIGNMENT WAL record
913 : */
914 : void
915 0 : ProcArrayApplyXidAssignment(TransactionId topxid,
916 : int nsubxids, TransactionId *subxids)
917 : {
918 : TransactionId max_xid;
919 : int i;
920 :
921 0 : Assert(standbyState >= STANDBY_INITIALIZED);
922 :
923 0 : max_xid = TransactionIdLatest(topxid, nsubxids, subxids);
924 :
925 : /*
926 : * Mark all the subtransactions as observed.
927 : *
928 : * NOTE: This will fail if the subxid contains too many previously
929 : * unobserved xids to fit into known-assigned-xids. That shouldn't happen
930 : * as the code stands, because xid-assignment records should never contain
931 : * more than PGPROC_MAX_CACHED_SUBXIDS entries.
932 : */
933 0 : RecordKnownAssignedTransactionIds(max_xid);
934 :
935 : /*
936 : * Notice that we update pg_subtrans with the top-level xid, rather than
937 : * the parent xid. This is a difference between normal processing and
938 : * recovery, yet is still correct in all cases. The reason is that
939 : * subtransaction commit is not marked in clog until commit processing, so
940 : * all aborted subtransactions have already been clearly marked in clog.
941 : * As a result we are able to refer directly to the top-level
942 : * transaction's state rather than skipping through all the intermediate
943 : * states in the subtransaction tree. This should be the first time we
944 : * have attempted to SubTransSetParent().
945 : */
946 0 : for (i = 0; i < nsubxids; i++)
947 0 : SubTransSetParent(subxids[i], topxid);
948 :
949 : /* KnownAssignedXids isn't maintained yet, so we're done for now */
950 0 : if (standbyState == STANDBY_INITIALIZED)
951 0 : return;
952 :
953 : /*
954 : * Uses same locking as transaction commit
955 : */
956 0 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
957 :
958 : /*
959 : * Remove subxids from known-assigned-xacts.
960 : */
961 0 : KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids);
962 :
963 : /*
964 : * Advance lastOverflowedXid to be at least the last of these subxids.
965 : */
966 0 : if (TransactionIdPrecedes(procArray->lastOverflowedXid, max_xid))
967 0 : procArray->lastOverflowedXid = max_xid;
968 :
969 0 : LWLockRelease(ProcArrayLock);
970 : }
971 :
972 : /*
973 : * TransactionIdIsInProgress -- is given transaction running in some backend
974 : *
975 : * Aside from some shortcuts such as checking RecentXmin and our own Xid,
976 : * there are four possibilities for finding a running transaction:
977 : *
978 : * 1. The given Xid is a main transaction Id. We will find this out cheaply
979 : * by looking at the PGXACT struct for each backend.
980 : *
981 : * 2. The given Xid is one of the cached subxact Xids in the PGPROC array.
982 : * We can find this out cheaply too.
983 : *
984 : * 3. In Hot Standby mode, we must search the KnownAssignedXids list to see
985 : * if the Xid is running on the master.
986 : *
987 : * 4. Search the SubTrans tree to find the Xid's topmost parent, and then see
988 : * if that is running according to PGXACT or KnownAssignedXids. This is the
989 : * slowest way, but sadly it has to be done always if the others failed,
990 : * unless we see that the cached subxact sets are complete (none have
991 : * overflowed).
992 : *
993 : * ProcArrayLock has to be held while we do 1, 2, 3. If we save the top Xids
994 : * while doing 1 and 3, we can release the ProcArrayLock while we do 4.
995 : * This buys back some concurrency (and we can't retrieve the main Xids from
996 : * PGXACT again anyway; see GetNewTransactionId).
997 : */
998 : bool
999 548656 : TransactionIdIsInProgress(TransactionId xid)
1000 : {
1001 : static TransactionId *xids = NULL;
1002 548656 : int nxids = 0;
1003 548656 : ProcArrayStruct *arrayP = procArray;
1004 : TransactionId topxid;
1005 : int i,
1006 : j;
1007 :
1008 : /*
1009 : * Don't bother checking a transaction older than RecentXmin; it could not
1010 : * possibly still be running. (Note: in particular, this guarantees that
1011 : * we reject InvalidTransactionId, FrozenTransactionId, etc as not
1012 : * running.)
1013 : */
1014 548656 : if (TransactionIdPrecedes(xid, RecentXmin))
1015 : {
1016 : xc_by_recent_xmin_inc();
1017 521368 : return false;
1018 : }
1019 :
1020 : /*
1021 : * We may have just checked the status of this transaction, so if it is
1022 : * already known to be completed, we can fall out without any access to
1023 : * shared memory.
1024 : */
1025 27288 : if (TransactionIdIsKnownCompleted(xid))
1026 : {
1027 : xc_by_known_xact_inc();
1028 25611 : return false;
1029 : }
1030 :
1031 : /*
1032 : * Also, we can handle our own transaction (and subtransactions) without
1033 : * any access to shared memory.
1034 : */
1035 1677 : if (TransactionIdIsCurrentTransactionId(xid))
1036 : {
1037 : xc_by_my_xact_inc();
1038 731 : return true;
1039 : }
1040 :
1041 : /*
1042 : * If first time through, get workspace to remember main XIDs in. We
1043 : * malloc it permanently to avoid repeated palloc/pfree overhead.
1044 : */
1045 946 : if (xids == NULL)
1046 : {
1047 : /*
1048 : * In hot standby mode, reserve enough space to hold all xids in the
1049 : * known-assigned list. If we later finish recovery, we no longer need
1050 : * the bigger array, but we don't bother to shrink it.
1051 : */
1052 68 : int maxxids = RecoveryInProgress() ? TOTAL_MAX_CACHED_SUBXIDS : arrayP->maxProcs;
1053 :
1054 68 : xids = (TransactionId *) malloc(maxxids * sizeof(TransactionId));
1055 68 : if (xids == NULL)
1056 0 : ereport(ERROR,
1057 : (errcode(ERRCODE_OUT_OF_MEMORY),
1058 : errmsg("out of memory")));
1059 : }
1060 :
1061 946 : LWLockAcquire(ProcArrayLock, LW_SHARED);
1062 :
1063 : /*
1064 : * Now that we have the lock, we can check latestCompletedXid; if the
1065 : * target Xid is after that, it's surely still running.
1066 : */
1067 946 : if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xid))
1068 : {
1069 80 : LWLockRelease(ProcArrayLock);
1070 : xc_by_latest_xid_inc();
1071 80 : return true;
1072 : }
1073 :
1074 : /* No shortcuts, gotta grovel through the array */
1075 8739 : for (i = 0; i < arrayP->numProcs; i++)
1076 : {
1077 8003 : int pgprocno = arrayP->pgprocnos[i];
1078 8003 : volatile PGPROC *proc = &allProcs[pgprocno];
1079 8003 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
1080 : TransactionId pxid;
1081 :
1082 : /* Ignore my own proc --- dealt with it above */
1083 8003 : if (proc == MyProc)
1084 779 : continue;
1085 :
1086 : /* Fetch xid just once - see GetNewTransactionId */
1087 7224 : pxid = pgxact->xid;
1088 :
1089 7224 : if (!TransactionIdIsValid(pxid))
1090 5375 : continue;
1091 :
1092 : /*
1093 : * Step 1: check the main Xid
1094 : */
1095 1849 : if (TransactionIdEquals(pxid, xid))
1096 : {
1097 130 : LWLockRelease(ProcArrayLock);
1098 : xc_by_main_xid_inc();
1099 130 : return true;
1100 : }
1101 :
1102 : /*
1103 : * We can ignore main Xids that are younger than the target Xid, since
1104 : * the target could not possibly be their child.
1105 : */
1106 1719 : if (TransactionIdPrecedes(xid, pxid))
1107 602 : continue;
1108 :
1109 : /*
1110 : * Step 2: check the cached child-Xids arrays
1111 : */
1112 1129 : for (j = pgxact->nxids - 1; j >= 0; j--)
1113 : {
1114 : /* Fetch xid just once - see GetNewTransactionId */
1115 12 : TransactionId cxid = proc->subxids.xids[j];
1116 :
1117 12 : if (TransactionIdEquals(cxid, xid))
1118 : {
1119 0 : LWLockRelease(ProcArrayLock);
1120 : xc_by_child_xid_inc();
1121 0 : return true;
1122 : }
1123 : }
1124 :
1125 : /*
1126 : * Save the main Xid for step 4. We only need to remember main Xids
1127 : * that have uncached children. (Note: there is no race condition
1128 : * here because the overflowed flag cannot be cleared, only set, while
1129 : * we hold ProcArrayLock. So we can't miss an Xid that we need to
1130 : * worry about.)
1131 : */
1132 1117 : if (pgxact->overflowed)
1133 0 : xids[nxids++] = pxid;
1134 : }
1135 :
1136 : /*
1137 : * Step 3: in hot standby mode, check the known-assigned-xids list. XIDs
1138 : * in the list must be treated as running.
1139 : */
1140 736 : if (RecoveryInProgress())
1141 : {
1142 : /* none of the PGXACT entries should have XIDs in hot standby mode */
1143 0 : Assert(nxids == 0);
1144 :
1145 0 : if (KnownAssignedXidExists(xid))
1146 : {
1147 0 : LWLockRelease(ProcArrayLock);
1148 : xc_by_known_assigned_inc();
1149 0 : return true;
1150 : }
1151 :
1152 : /*
1153 : * If the KnownAssignedXids overflowed, we have to check pg_subtrans
1154 : * too. Fetch all xids from KnownAssignedXids that are lower than
1155 : * xid, since if xid is a subtransaction its parent will always have a
1156 : * lower value. Note we will collect both main and subXIDs here, but
1157 : * there's no help for it.
1158 : */
1159 0 : if (TransactionIdPrecedesOrEquals(xid, procArray->lastOverflowedXid))
1160 0 : nxids = KnownAssignedXidsGet(xids, xid);
1161 : }
1162 :
1163 736 : LWLockRelease(ProcArrayLock);
1164 :
1165 : /*
1166 : * If none of the relevant caches overflowed, we know the Xid is not
1167 : * running without even looking at pg_subtrans.
1168 : */
1169 736 : if (nxids == 0)
1170 : {
1171 : xc_no_overflow_inc();
1172 736 : return false;
1173 : }
1174 :
1175 : /*
1176 : * Step 4: have to check pg_subtrans.
1177 : *
1178 : * At this point, we know it's either a subtransaction of one of the Xids
1179 : * in xids[], or it's not running. If it's an already-failed
1180 : * subtransaction, we want to say "not running" even though its parent may
1181 : * still be running. So first, check pg_xact to see if it's been aborted.
1182 : */
1183 : xc_slow_answer_inc();
1184 :
1185 0 : if (TransactionIdDidAbort(xid))
1186 0 : return false;
1187 :
1188 : /*
1189 : * It isn't aborted, so check whether the transaction tree it belongs to
1190 : * is still running (or, more precisely, whether it was running when we
1191 : * held ProcArrayLock).
1192 : */
1193 0 : topxid = SubTransGetTopmostTransaction(xid);
1194 0 : Assert(TransactionIdIsValid(topxid));
1195 0 : if (!TransactionIdEquals(topxid, xid))
1196 : {
1197 0 : for (i = 0; i < nxids; i++)
1198 : {
1199 0 : if (TransactionIdEquals(xids[i], topxid))
1200 0 : return true;
1201 : }
1202 : }
1203 :
1204 0 : return false;
1205 : }
1206 :
1207 : /*
1208 : * TransactionIdIsActive -- is xid the top-level XID of an active backend?
1209 : *
1210 : * This differs from TransactionIdIsInProgress in that it ignores prepared
1211 : * transactions, as well as transactions running on the master if we're in
1212 : * hot standby. Also, we ignore subtransactions since that's not needed
1213 : * for current uses.
1214 : */
1215 : bool
1216 0 : TransactionIdIsActive(TransactionId xid)
1217 : {
1218 0 : bool result = false;
1219 0 : ProcArrayStruct *arrayP = procArray;
1220 : int i;
1221 :
1222 : /*
1223 : * Don't bother checking a transaction older than RecentXmin; it could not
1224 : * possibly still be running.
1225 : */
1226 0 : if (TransactionIdPrecedes(xid, RecentXmin))
1227 0 : return false;
1228 :
1229 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
1230 :
1231 0 : for (i = 0; i < arrayP->numProcs; i++)
1232 : {
1233 0 : int pgprocno = arrayP->pgprocnos[i];
1234 0 : volatile PGPROC *proc = &allProcs[pgprocno];
1235 0 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
1236 : TransactionId pxid;
1237 :
1238 : /* Fetch xid just once - see GetNewTransactionId */
1239 0 : pxid = pgxact->xid;
1240 :
1241 0 : if (!TransactionIdIsValid(pxid))
1242 0 : continue;
1243 :
1244 0 : if (proc->pid == 0)
1245 0 : continue; /* ignore prepared transactions */
1246 :
1247 0 : if (TransactionIdEquals(pxid, xid))
1248 : {
1249 0 : result = true;
1250 0 : break;
1251 : }
1252 : }
1253 :
1254 0 : LWLockRelease(ProcArrayLock);
1255 :
1256 0 : return result;
1257 : }
1258 :
1259 :
1260 : /*
1261 : * GetOldestXmin -- returns oldest transaction that was running
1262 : * when any current transaction was started.
1263 : *
1264 : * If rel is NULL or a shared relation, all backends are considered, otherwise
1265 : * only backends running in this database are considered.
1266 : *
1267 : * The flags are used to ignore the backends in calculation when any of the
1268 : * corresponding flags is set. Typically, if you want to ignore ones with
1269 : * PROC_IN_VACUUM flag, you can use PROCARRAY_FLAGS_VACUUM.
1270 : *
1271 : * PROCARRAY_SLOTS_XMIN causes GetOldestXmin to ignore the xmin and
1272 : * catalog_xmin of any replication slots that exist in the system when
1273 : * calculating the oldest xmin.
1274 : *
1275 : * This is used by VACUUM to decide which deleted tuples must be preserved in
1276 : * the passed in table. For shared relations backends in all databases must be
1277 : * considered, but for non-shared relations that's not required, since only
1278 : * backends in my own database could ever see the tuples in them. Also, we can
1279 : * ignore concurrently running lazy VACUUMs because (a) they must be working
1280 : * on other tables, and (b) they don't need to do snapshot-based lookups.
1281 : *
1282 : * This is also used to determine where to truncate pg_subtrans. For that
1283 : * backends in all databases have to be considered, so rel = NULL has to be
1284 : * passed in.
1285 : *
1286 : * Note: we include all currently running xids in the set of considered xids.
1287 : * This ensures that if a just-started xact has not yet set its snapshot,
1288 : * when it does set the snapshot it cannot set xmin less than what we compute.
1289 : * See notes in src/backend/access/transam/README.
1290 : *
1291 : * Note: despite the above, it's possible for the calculated value to move
1292 : * backwards on repeated calls. The calculated value is conservative, so that
1293 : * anything older is definitely not considered as running by anyone anymore,
1294 : * but the exact value calculated depends on a number of things. For example,
1295 : * if rel = NULL and there are no transactions running in the current
1296 : * database, GetOldestXmin() returns latestCompletedXid. If a transaction
1297 : * begins after that, its xmin will include in-progress transactions in other
1298 : * databases that started earlier, so another call will return a lower value.
1299 : * Nonetheless it is safe to vacuum a table in the current database with the
1300 : * first result. There are also replication-related effects: a walsender
1301 : * process can set its xmin based on transactions that are no longer running
1302 : * in the master but are still being replayed on the standby, thus possibly
1303 : * making the GetOldestXmin reading go backwards. In this case there is a
1304 : * possibility that we lose data that the standby would like to have, but
1305 : * unless the standby uses a replication slot to make its xmin persistent
1306 : * there is little we can do about that --- data is only protected if the
1307 : * walsender runs continuously while queries are executed on the standby.
1308 : * (The Hot Standby code deals with such cases by failing standby queries
1309 : * that needed to access already-removed data, so there's no integrity bug.)
1310 : * The return value is also adjusted with vacuum_defer_cleanup_age, so
1311 : * increasing that setting on the fly is another easy way to make
1312 : * GetOldestXmin() move backwards, with no consequences for data integrity.
1313 : */
1314 : TransactionId
1315 2050 : GetOldestXmin(Relation rel, int flags)
1316 : {
1317 2050 : ProcArrayStruct *arrayP = procArray;
1318 : TransactionId result;
1319 : int index;
1320 : bool allDbs;
1321 :
1322 2050 : volatile TransactionId replication_slot_xmin = InvalidTransactionId;
1323 2050 : volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
1324 :
1325 : /*
1326 : * If we're not computing a relation specific limit, or if a shared
1327 : * relation has been passed in, backends in all databases have to be
1328 : * considered.
1329 : */
1330 2050 : allDbs = rel == NULL || rel->rd_rel->relisshared;
1331 :
1332 : /* Cannot look for individual databases during recovery */
1333 2050 : Assert(allDbs || !RecoveryInProgress());
1334 :
1335 2050 : LWLockAcquire(ProcArrayLock, LW_SHARED);
1336 :
1337 : /*
1338 : * We initialize the MIN() calculation with latestCompletedXid + 1. This
1339 : * is a lower bound for the XIDs that might appear in the ProcArray later,
1340 : * and so protects us against overestimating the result due to future
1341 : * additions.
1342 : */
1343 2050 : result = ShmemVariableCache->latestCompletedXid;
1344 2050 : Assert(TransactionIdIsNormal(result));
1345 2050 : TransactionIdAdvance(result);
1346 :
1347 14369 : for (index = 0; index < arrayP->numProcs; index++)
1348 : {
1349 12319 : int pgprocno = arrayP->pgprocnos[index];
1350 12319 : volatile PGPROC *proc = &allProcs[pgprocno];
1351 12319 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
1352 :
1353 12319 : if (pgxact->vacuumFlags & (flags & PROCARRAY_PROC_FLAGS_MASK))
1354 434 : continue;
1355 :
1356 23331 : if (allDbs ||
1357 15048 : proc->databaseId == MyDatabaseId ||
1358 3602 : proc->databaseId == 0) /* always include WalSender */
1359 : {
1360 : /* Fetch xid just once - see GetNewTransactionId */
1361 11885 : TransactionId xid = pgxact->xid;
1362 :
1363 : /* First consider the transaction's own Xid, if any */
1364 14786 : if (TransactionIdIsNormal(xid) &&
1365 2901 : TransactionIdPrecedes(xid, result))
1366 395 : result = xid;
1367 :
1368 : /*
1369 : * Also consider the transaction's Xmin, if set.
1370 : *
1371 : * We must check both Xid and Xmin because a transaction might
1372 : * have an Xmin but not (yet) an Xid; conversely, if it has an
1373 : * Xid, that could determine some not-yet-set Xmin.
1374 : */
1375 11885 : xid = pgxact->xmin; /* Fetch just once */
1376 16138 : if (TransactionIdIsNormal(xid) &&
1377 4253 : TransactionIdPrecedes(xid, result))
1378 1448 : result = xid;
1379 : }
1380 : }
1381 :
1382 : /* fetch into volatile var while ProcArrayLock is held */
1383 2050 : replication_slot_xmin = procArray->replication_slot_xmin;
1384 2050 : replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
1385 :
1386 2050 : if (RecoveryInProgress())
1387 : {
1388 : /*
1389 : * Check to see whether KnownAssignedXids contains an xid value older
1390 : * than the main procarray.
1391 : */
1392 0 : TransactionId kaxmin = KnownAssignedXidsGetOldestXmin();
1393 :
1394 0 : LWLockRelease(ProcArrayLock);
1395 :
1396 0 : if (TransactionIdIsNormal(kaxmin) &&
1397 0 : TransactionIdPrecedes(kaxmin, result))
1398 0 : result = kaxmin;
1399 : }
1400 : else
1401 : {
1402 : /*
1403 : * No other information needed, so release the lock immediately.
1404 : */
1405 2050 : LWLockRelease(ProcArrayLock);
1406 :
1407 : /*
1408 : * Compute the cutoff XID by subtracting vacuum_defer_cleanup_age,
1409 : * being careful not to generate a "permanent" XID.
1410 : *
1411 : * vacuum_defer_cleanup_age provides some additional "slop" for the
1412 : * benefit of hot standby queries on standby servers. This is quick
1413 : * and dirty, and perhaps not all that useful unless the master has a
1414 : * predictable transaction rate, but it offers some protection when
1415 : * there's no walsender connection. Note that we are assuming
1416 : * vacuum_defer_cleanup_age isn't large enough to cause wraparound ---
1417 : * so guc.c should limit it to no more than the xidStopLimit threshold
1418 : * in varsup.c. Also note that we intentionally don't apply
1419 : * vacuum_defer_cleanup_age on standby servers.
1420 : */
1421 2050 : result -= vacuum_defer_cleanup_age;
1422 2050 : if (!TransactionIdIsNormal(result))
1423 0 : result = FirstNormalTransactionId;
1424 : }
1425 :
1426 : /*
1427 : * Check whether there are replication slots requiring an older xmin.
1428 : */
1429 4100 : if (!(flags & PROCARRAY_SLOTS_XMIN) &&
1430 2050 : TransactionIdIsValid(replication_slot_xmin) &&
1431 0 : NormalTransactionIdPrecedes(replication_slot_xmin, result))
1432 0 : result = replication_slot_xmin;
1433 :
1434 : /*
1435 : * After locks have been released and defer_cleanup_age has been applied,
1436 : * check whether we need to back up further to make logical decoding
1437 : * possible. We need to do so if we're computing the global limit (rel =
1438 : * NULL) or if the passed relation is a catalog relation of some kind.
1439 : */
1440 2050 : if (!(flags & PROCARRAY_SLOTS_XMIN) &&
1441 1991 : (rel == NULL ||
1442 2050 : RelationIsAccessibleInLogicalDecoding(rel)) &&
1443 59 : TransactionIdIsValid(replication_slot_catalog_xmin) &&
1444 0 : NormalTransactionIdPrecedes(replication_slot_catalog_xmin, result))
1445 0 : result = replication_slot_catalog_xmin;
1446 :
1447 2050 : return result;
1448 : }
1449 :
1450 : /*
1451 : * GetMaxSnapshotXidCount -- get max size for snapshot XID array
1452 : *
1453 : * We have to export this for use by snapmgr.c.
1454 : */
1455 : int
1456 837 : GetMaxSnapshotXidCount(void)
1457 : {
1458 837 : return procArray->maxProcs;
1459 : }
1460 :
1461 : /*
1462 : * GetMaxSnapshotSubxidCount -- get max size for snapshot sub-XID array
1463 : *
1464 : * We have to export this for use by snapmgr.c.
1465 : */
1466 : int
1467 837 : GetMaxSnapshotSubxidCount(void)
1468 : {
1469 837 : return TOTAL_MAX_CACHED_SUBXIDS;
1470 : }
1471 :
1472 : /*
1473 : * GetSnapshotData -- returns information about running transactions.
1474 : *
1475 : * The returned snapshot includes xmin (lowest still-running xact ID),
1476 : * xmax (highest completed xact ID + 1), and a list of running xact IDs
1477 : * in the range xmin <= xid < xmax. It is used as follows:
1478 : * All xact IDs < xmin are considered finished.
1479 : * All xact IDs >= xmax are considered still running.
1480 : * For an xact ID xmin <= xid < xmax, consult list to see whether
1481 : * it is considered running or not.
1482 : * This ensures that the set of transactions seen as "running" by the
1483 : * current xact will not change after it takes the snapshot.
1484 : *
1485 : * All running top-level XIDs are included in the snapshot, except for lazy
1486 : * VACUUM processes. We also try to include running subtransaction XIDs,
1487 : * but since PGPROC has only a limited cache area for subxact XIDs, full
1488 : * information may not be available. If we find any overflowed subxid arrays,
1489 : * we have to mark the snapshot's subxid data as overflowed, and extra work
1490 : * *may* need to be done to determine what's running (see XidInMVCCSnapshot()
1491 : * in tqual.c).
1492 : *
1493 : * We also update the following backend-global variables:
1494 : * TransactionXmin: the oldest xmin of any snapshot in use in the
1495 : * current transaction (this is the same as MyPgXact->xmin).
1496 : * RecentXmin: the xmin computed for the most recent snapshot. XIDs
1497 : * older than this are known not running any more.
1498 : * RecentGlobalXmin: the global xmin (oldest TransactionXmin across all
1499 : * running transactions, except those running LAZY VACUUM). This is
1500 : * the same computation done by
1501 : * GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM).
1502 : * RecentGlobalDataXmin: the global xmin for non-catalog tables
1503 : * >= RecentGlobalXmin
1504 : *
1505 : * Note: this function should probably not be called with an argument that's
1506 : * not statically allocated (see xip allocation below).
1507 : */
1508 : Snapshot
1509 123706 : GetSnapshotData(Snapshot snapshot)
1510 : {
1511 123706 : ProcArrayStruct *arrayP = procArray;
1512 : TransactionId xmin;
1513 : TransactionId xmax;
1514 : TransactionId globalxmin;
1515 : int index;
1516 123706 : int count = 0;
1517 123706 : int subcount = 0;
1518 123706 : bool suboverflowed = false;
1519 123706 : volatile TransactionId replication_slot_xmin = InvalidTransactionId;
1520 123706 : volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
1521 :
1522 123706 : Assert(snapshot != NULL);
1523 :
1524 : /*
1525 : * Allocating space for maxProcs xids is usually overkill; numProcs would
1526 : * be sufficient. But it seems better to do the malloc while not holding
1527 : * the lock, so we can't look at numProcs. Likewise, we allocate much
1528 : * more subxip storage than is probably needed.
1529 : *
1530 : * This does open a possibility for avoiding repeated malloc/free: since
1531 : * maxProcs does not change at runtime, we can simply reuse the previous
1532 : * xip arrays if any. (This relies on the fact that all callers pass
1533 : * static SnapshotData structs.)
1534 : */
1535 123706 : if (snapshot->xip == NULL)
1536 : {
1537 : /*
1538 : * First call for this snapshot. Snapshot is same size whether or not
1539 : * we are in recovery, see later comments.
1540 : */
1541 722 : snapshot->xip = (TransactionId *)
1542 722 : malloc(GetMaxSnapshotXidCount() * sizeof(TransactionId));
1543 722 : if (snapshot->xip == NULL)
1544 0 : ereport(ERROR,
1545 : (errcode(ERRCODE_OUT_OF_MEMORY),
1546 : errmsg("out of memory")));
1547 722 : Assert(snapshot->subxip == NULL);
1548 722 : snapshot->subxip = (TransactionId *)
1549 722 : malloc(GetMaxSnapshotSubxidCount() * sizeof(TransactionId));
1550 722 : if (snapshot->subxip == NULL)
1551 0 : ereport(ERROR,
1552 : (errcode(ERRCODE_OUT_OF_MEMORY),
1553 : errmsg("out of memory")));
1554 : }
1555 :
1556 : /*
1557 : * It is sufficient to get shared lock on ProcArrayLock, even if we are
1558 : * going to set MyPgXact->xmin.
1559 : */
1560 123706 : LWLockAcquire(ProcArrayLock, LW_SHARED);
1561 :
1562 : /* xmax is always latestCompletedXid + 1 */
1563 123706 : xmax = ShmemVariableCache->latestCompletedXid;
1564 123706 : Assert(TransactionIdIsNormal(xmax));
1565 123706 : TransactionIdAdvance(xmax);
1566 :
1567 : /* initialize xmin calculation with xmax */
1568 123706 : globalxmin = xmin = xmax;
1569 :
1570 123706 : snapshot->takenDuringRecovery = RecoveryInProgress();
1571 :
1572 123706 : if (!snapshot->takenDuringRecovery)
1573 : {
1574 123706 : int *pgprocnos = arrayP->pgprocnos;
1575 : int numProcs;
1576 :
1577 : /*
1578 : * Spin over procArray checking xid, xmin, and subxids. The goal is
1579 : * to gather all active xids, find the lowest xmin, and try to record
1580 : * subxids.
1581 : */
1582 123706 : numProcs = arrayP->numProcs;
1583 1138561 : for (index = 0; index < numProcs; index++)
1584 : {
1585 1014855 : int pgprocno = pgprocnos[index];
1586 1014855 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
1587 : TransactionId xid;
1588 :
1589 : /*
1590 : * Backend is doing logical decoding which manages xmin
1591 : * separately, check below.
1592 : */
1593 1014855 : if (pgxact->vacuumFlags & PROC_IN_LOGICAL_DECODING)
1594 0 : continue;
1595 :
1596 : /* Ignore procs running LAZY VACUUM */
1597 1014855 : if (pgxact->vacuumFlags & PROC_IN_VACUUM)
1598 1841 : continue;
1599 :
1600 : /* Update globalxmin to be the smallest valid xmin */
1601 1013014 : xid = pgxact->xmin; /* fetch just once */
1602 1333919 : if (TransactionIdIsNormal(xid) &&
1603 320905 : NormalTransactionIdPrecedes(xid, globalxmin))
1604 131587 : globalxmin = xid;
1605 :
1606 : /* Fetch xid just once - see GetNewTransactionId */
1607 1013014 : xid = pgxact->xid;
1608 :
1609 : /*
1610 : * If the transaction has no XID assigned, we can skip it; it
1611 : * won't have sub-XIDs either. If the XID is >= xmax, we can also
1612 : * skip it; such transactions will be treated as running anyway
1613 : * (and any sub-XIDs will also be >= xmax).
1614 : */
1615 1013014 : if (!TransactionIdIsNormal(xid)
1616 195935 : || !NormalTransactionIdPrecedes(xid, xmax))
1617 892035 : continue;
1618 :
1619 : /*
1620 : * We don't include our own XIDs (if any) in the snapshot, but we
1621 : * must include them in xmin.
1622 : */
1623 120979 : if (NormalTransactionIdPrecedes(xid, xmin))
1624 99997 : xmin = xid;
1625 120979 : if (pgxact == MyPgXact)
1626 12319 : continue;
1627 :
1628 : /* Add XID to snapshot. */
1629 108660 : snapshot->xip[count++] = xid;
1630 :
1631 : /*
1632 : * Save subtransaction XIDs if possible (if we've already
1633 : * overflowed, there's no point). Note that the subxact XIDs must
1634 : * be later than their parent, so no need to check them against
1635 : * xmin. We could filter against xmax, but it seems better not to
1636 : * do that much work while holding the ProcArrayLock.
1637 : *
1638 : * The other backend can add more subxids concurrently, but cannot
1639 : * remove any. Hence it's important to fetch nxids just once.
1640 : * Should be safe to use memcpy, though. (We needn't worry about
1641 : * missing any xids added concurrently, because they must postdate
1642 : * xmax.)
1643 : *
1644 : * Again, our own XIDs are not included in the snapshot.
1645 : */
1646 108660 : if (!suboverflowed)
1647 : {
1648 108660 : if (pgxact->overflowed)
1649 0 : suboverflowed = true;
1650 : else
1651 : {
1652 108660 : int nxids = pgxact->nxids;
1653 :
1654 108660 : if (nxids > 0)
1655 : {
1656 1036 : volatile PGPROC *proc = &allProcs[pgprocno];
1657 :
1658 2072 : memcpy(snapshot->subxip + subcount,
1659 1036 : (void *) proc->subxids.xids,
1660 : nxids * sizeof(TransactionId));
1661 1036 : subcount += nxids;
1662 : }
1663 : }
1664 : }
1665 : }
1666 : }
1667 : else
1668 : {
1669 : /*
1670 : * We're in hot standby, so get XIDs from KnownAssignedXids.
1671 : *
1672 : * We store all xids directly into subxip[]. Here's why:
1673 : *
1674 : * In recovery we don't know which xids are top-level and which are
1675 : * subxacts, a design choice that greatly simplifies xid processing.
1676 : *
1677 : * It seems like we would want to try to put xids into xip[] only, but
1678 : * that is fairly small. We would either need to make that bigger or
1679 : * to increase the rate at which we WAL-log xid assignment; neither is
1680 : * an appealing choice.
1681 : *
1682 : * We could try to store xids into xip[] first and then into subxip[]
1683 : * if there are too many xids. That only works if the snapshot doesn't
1684 : * overflow because we do not search subxip[] in that case. A simpler
1685 : * way is to just store all xids in the subxact array because this is
1686 : * by far the bigger array. We just leave the xip array empty.
1687 : *
1688 : * Either way we need to change the way XidInMVCCSnapshot() works
1689 : * depending upon when the snapshot was taken, or change normal
1690 : * snapshot processing so it matches.
1691 : *
1692 : * Note: It is possible for recovery to end before we finish taking
1693 : * the snapshot, and for newly assigned transaction ids to be added to
1694 : * the ProcArray. xmax cannot change while we hold ProcArrayLock, so
1695 : * those newly added transaction ids would be filtered away, so we
1696 : * need not be concerned about them.
1697 : */
1698 0 : subcount = KnownAssignedXidsGetAndSetXmin(snapshot->subxip, &xmin,
1699 : xmax);
1700 :
1701 0 : if (TransactionIdPrecedesOrEquals(xmin, procArray->lastOverflowedXid))
1702 0 : suboverflowed = true;
1703 : }
1704 :
1705 :
1706 : /* fetch into volatile var while ProcArrayLock is held */
1707 123706 : replication_slot_xmin = procArray->replication_slot_xmin;
1708 123706 : replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin;
1709 :
1710 123706 : if (!TransactionIdIsValid(MyPgXact->xmin))
1711 44896 : MyPgXact->xmin = TransactionXmin = xmin;
1712 :
1713 123706 : LWLockRelease(ProcArrayLock);
1714 :
1715 : /*
1716 : * Update globalxmin to include actual process xids. This is a slightly
1717 : * different way of computing it than GetOldestXmin uses, but should give
1718 : * the same result.
1719 : */
1720 123706 : if (TransactionIdPrecedes(xmin, globalxmin))
1721 438 : globalxmin = xmin;
1722 :
1723 : /* Update global variables too */
1724 123706 : RecentGlobalXmin = globalxmin - vacuum_defer_cleanup_age;
1725 123706 : if (!TransactionIdIsNormal(RecentGlobalXmin))
1726 0 : RecentGlobalXmin = FirstNormalTransactionId;
1727 :
1728 : /* Check whether there's a replication slot requiring an older xmin. */
1729 123706 : if (TransactionIdIsValid(replication_slot_xmin) &&
1730 0 : NormalTransactionIdPrecedes(replication_slot_xmin, RecentGlobalXmin))
1731 0 : RecentGlobalXmin = replication_slot_xmin;
1732 :
1733 : /* Non-catalog tables can be vacuumed if older than this xid */
1734 123706 : RecentGlobalDataXmin = RecentGlobalXmin;
1735 :
1736 : /*
1737 : * Check whether there's a replication slot requiring an older catalog
1738 : * xmin.
1739 : */
1740 123706 : if (TransactionIdIsNormal(replication_slot_catalog_xmin) &&
1741 0 : NormalTransactionIdPrecedes(replication_slot_catalog_xmin, RecentGlobalXmin))
1742 0 : RecentGlobalXmin = replication_slot_catalog_xmin;
1743 :
1744 123706 : RecentXmin = xmin;
1745 :
1746 123706 : snapshot->xmin = xmin;
1747 123706 : snapshot->xmax = xmax;
1748 123706 : snapshot->xcnt = count;
1749 123706 : snapshot->subxcnt = subcount;
1750 123706 : snapshot->suboverflowed = suboverflowed;
1751 :
1752 123706 : snapshot->curcid = GetCurrentCommandId(false);
1753 :
1754 : /*
1755 : * This is a new snapshot, so set both refcounts are zero, and mark it as
1756 : * not copied in persistent memory.
1757 : */
1758 123706 : snapshot->active_count = 0;
1759 123706 : snapshot->regd_count = 0;
1760 123706 : snapshot->copied = false;
1761 :
1762 123706 : if (old_snapshot_threshold < 0)
1763 : {
1764 : /*
1765 : * If not using "snapshot too old" feature, fill related fields with
1766 : * dummy values that don't require any locking.
1767 : */
1768 123706 : snapshot->lsn = InvalidXLogRecPtr;
1769 123706 : snapshot->whenTaken = 0;
1770 : }
1771 : else
1772 : {
1773 : /*
1774 : * Capture the current time and WAL stream location in case this
1775 : * snapshot becomes old enough to need to fall back on the special
1776 : * "old snapshot" logic.
1777 : */
1778 0 : snapshot->lsn = GetXLogInsertRecPtr();
1779 0 : snapshot->whenTaken = GetSnapshotCurrentTimestamp();
1780 0 : MaintainOldSnapshotTimeMapping(snapshot->whenTaken, xmin);
1781 : }
1782 :
1783 123706 : return snapshot;
1784 : }
1785 :
1786 : /*
1787 : * ProcArrayInstallImportedXmin -- install imported xmin into MyPgXact->xmin
1788 : *
1789 : * This is called when installing a snapshot imported from another
1790 : * transaction. To ensure that OldestXmin doesn't go backwards, we must
1791 : * check that the source transaction is still running, and we'd better do
1792 : * that atomically with installing the new xmin.
1793 : *
1794 : * Returns TRUE if successful, FALSE if source xact is no longer running.
1795 : */
1796 : bool
1797 0 : ProcArrayInstallImportedXmin(TransactionId xmin,
1798 : VirtualTransactionId *sourcevxid)
1799 : {
1800 0 : bool result = false;
1801 0 : ProcArrayStruct *arrayP = procArray;
1802 : int index;
1803 :
1804 0 : Assert(TransactionIdIsNormal(xmin));
1805 0 : if (!sourcevxid)
1806 0 : return false;
1807 :
1808 : /* Get lock so source xact can't end while we're doing this */
1809 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
1810 :
1811 0 : for (index = 0; index < arrayP->numProcs; index++)
1812 : {
1813 0 : int pgprocno = arrayP->pgprocnos[index];
1814 0 : volatile PGPROC *proc = &allProcs[pgprocno];
1815 0 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
1816 : TransactionId xid;
1817 :
1818 : /* Ignore procs running LAZY VACUUM */
1819 0 : if (pgxact->vacuumFlags & PROC_IN_VACUUM)
1820 0 : continue;
1821 :
1822 : /* We are only interested in the specific virtual transaction. */
1823 0 : if (proc->backendId != sourcevxid->backendId)
1824 0 : continue;
1825 0 : if (proc->lxid != sourcevxid->localTransactionId)
1826 0 : continue;
1827 :
1828 : /*
1829 : * We check the transaction's database ID for paranoia's sake: if it's
1830 : * in another DB then its xmin does not cover us. Caller should have
1831 : * detected this already, so we just treat any funny cases as
1832 : * "transaction not found".
1833 : */
1834 0 : if (proc->databaseId != MyDatabaseId)
1835 0 : continue;
1836 :
1837 : /*
1838 : * Likewise, let's just make real sure its xmin does cover us.
1839 : */
1840 0 : xid = pgxact->xmin; /* fetch just once */
1841 0 : if (!TransactionIdIsNormal(xid) ||
1842 0 : !TransactionIdPrecedesOrEquals(xid, xmin))
1843 0 : continue;
1844 :
1845 : /*
1846 : * We're good. Install the new xmin. As in GetSnapshotData, set
1847 : * TransactionXmin too. (Note that because snapmgr.c called
1848 : * GetSnapshotData first, we'll be overwriting a valid xmin here, so
1849 : * we don't check that.)
1850 : */
1851 0 : MyPgXact->xmin = TransactionXmin = xmin;
1852 :
1853 0 : result = true;
1854 0 : break;
1855 : }
1856 :
1857 0 : LWLockRelease(ProcArrayLock);
1858 :
1859 0 : return result;
1860 : }
1861 :
1862 : /*
1863 : * ProcArrayInstallRestoredXmin -- install restored xmin into MyPgXact->xmin
1864 : *
1865 : * This is like ProcArrayInstallImportedXmin, but we have a pointer to the
1866 : * PGPROC of the transaction from which we imported the snapshot, rather than
1867 : * an XID.
1868 : *
1869 : * Returns TRUE if successful, FALSE if source xact is no longer running.
1870 : */
1871 : bool
1872 115 : ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc)
1873 : {
1874 115 : bool result = false;
1875 : TransactionId xid;
1876 : volatile PGXACT *pgxact;
1877 :
1878 115 : Assert(TransactionIdIsNormal(xmin));
1879 115 : Assert(proc != NULL);
1880 :
1881 : /* Get lock so source xact can't end while we're doing this */
1882 115 : LWLockAcquire(ProcArrayLock, LW_SHARED);
1883 :
1884 115 : pgxact = &allPgXact[proc->pgprocno];
1885 :
1886 : /*
1887 : * Be certain that the referenced PGPROC has an advertised xmin which is
1888 : * no later than the one we're installing, so that the system-wide xmin
1889 : * can't go backwards. Also, make sure it's running in the same database,
1890 : * so that the per-database xmin cannot go backwards.
1891 : */
1892 115 : xid = pgxact->xmin; /* fetch just once */
1893 115 : if (proc->databaseId == MyDatabaseId &&
1894 115 : TransactionIdIsNormal(xid) &&
1895 115 : TransactionIdPrecedesOrEquals(xid, xmin))
1896 : {
1897 115 : MyPgXact->xmin = TransactionXmin = xmin;
1898 115 : result = true;
1899 : }
1900 :
1901 115 : LWLockRelease(ProcArrayLock);
1902 :
1903 115 : return result;
1904 : }
1905 :
1906 : /*
1907 : * GetRunningTransactionData -- returns information about running transactions.
1908 : *
1909 : * Similar to GetSnapshotData but returns more information. We include
1910 : * all PGXACTs with an assigned TransactionId, even VACUUM processes.
1911 : *
1912 : * We acquire XidGenLock and ProcArrayLock, but the caller is responsible for
1913 : * releasing them. Acquiring XidGenLock ensures that no new XIDs enter the proc
1914 : * array until the caller has WAL-logged this snapshot, and releases the
1915 : * lock. Acquiring ProcArrayLock ensures that no transactions commit until the
1916 : * lock is released.
1917 : *
1918 : * The returned data structure is statically allocated; caller should not
1919 : * modify it, and must not assume it is valid past the next call.
1920 : *
1921 : * This is never executed during recovery so there is no need to look at
1922 : * KnownAssignedXids.
1923 : *
1924 : * We don't worry about updating other counters, we want to keep this as
1925 : * simple as possible and leave GetSnapshotData() as the primary code for
1926 : * that bookkeeping.
1927 : *
1928 : * Note that if any transaction has overflowed its cached subtransactions
1929 : * then there is no real need include any subtransactions. That isn't a
1930 : * common enough case to worry about optimising the size of the WAL record,
1931 : * and we may wish to see that data for diagnostic purposes anyway.
1932 : */
1933 : RunningTransactions
1934 14 : GetRunningTransactionData(void)
1935 : {
1936 : /* result workspace */
1937 : static RunningTransactionsData CurrentRunningXactsData;
1938 :
1939 14 : ProcArrayStruct *arrayP = procArray;
1940 14 : RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData;
1941 : TransactionId latestCompletedXid;
1942 : TransactionId oldestRunningXid;
1943 : TransactionId *xids;
1944 : int index;
1945 : int count;
1946 : int subcount;
1947 : bool suboverflowed;
1948 :
1949 14 : Assert(!RecoveryInProgress());
1950 :
1951 : /*
1952 : * Allocating space for maxProcs xids is usually overkill; numProcs would
1953 : * be sufficient. But it seems better to do the malloc while not holding
1954 : * the lock, so we can't look at numProcs. Likewise, we allocate much
1955 : * more subxip storage than is probably needed.
1956 : *
1957 : * Should only be allocated in bgwriter, since only ever executed during
1958 : * checkpoints.
1959 : */
1960 14 : if (CurrentRunningXacts->xids == NULL)
1961 : {
1962 : /*
1963 : * First call
1964 : */
1965 3 : CurrentRunningXacts->xids = (TransactionId *)
1966 3 : malloc(TOTAL_MAX_CACHED_SUBXIDS * sizeof(TransactionId));
1967 3 : if (CurrentRunningXacts->xids == NULL)
1968 0 : ereport(ERROR,
1969 : (errcode(ERRCODE_OUT_OF_MEMORY),
1970 : errmsg("out of memory")));
1971 : }
1972 :
1973 14 : xids = CurrentRunningXacts->xids;
1974 :
1975 14 : count = subcount = 0;
1976 14 : suboverflowed = false;
1977 :
1978 : /*
1979 : * Ensure that no xids enter or leave the procarray while we obtain
1980 : * snapshot.
1981 : */
1982 14 : LWLockAcquire(ProcArrayLock, LW_SHARED);
1983 14 : LWLockAcquire(XidGenLock, LW_SHARED);
1984 :
1985 14 : latestCompletedXid = ShmemVariableCache->latestCompletedXid;
1986 :
1987 14 : oldestRunningXid = ShmemVariableCache->nextXid;
1988 :
1989 : /*
1990 : * Spin over procArray collecting all xids
1991 : */
1992 57 : for (index = 0; index < arrayP->numProcs; index++)
1993 : {
1994 43 : int pgprocno = arrayP->pgprocnos[index];
1995 43 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
1996 : TransactionId xid;
1997 :
1998 : /* Fetch xid just once - see GetNewTransactionId */
1999 43 : xid = pgxact->xid;
2000 :
2001 : /*
2002 : * We don't need to store transactions that don't have a TransactionId
2003 : * yet because they will not show as running on a standby server.
2004 : */
2005 43 : if (!TransactionIdIsValid(xid))
2006 33 : continue;
2007 :
2008 10 : xids[count++] = xid;
2009 :
2010 10 : if (TransactionIdPrecedes(xid, oldestRunningXid))
2011 10 : oldestRunningXid = xid;
2012 :
2013 10 : if (pgxact->overflowed)
2014 0 : suboverflowed = true;
2015 : }
2016 :
2017 : /*
2018 : * Spin over procArray collecting all subxids, but only if there hasn't
2019 : * been a suboverflow.
2020 : */
2021 14 : if (!suboverflowed)
2022 : {
2023 57 : for (index = 0; index < arrayP->numProcs; index++)
2024 : {
2025 43 : int pgprocno = arrayP->pgprocnos[index];
2026 43 : volatile PGPROC *proc = &allProcs[pgprocno];
2027 43 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2028 : int nxids;
2029 :
2030 : /*
2031 : * Save subtransaction XIDs. Other backends can't add or remove
2032 : * entries while we're holding XidGenLock.
2033 : */
2034 43 : nxids = pgxact->nxids;
2035 43 : if (nxids > 0)
2036 : {
2037 0 : memcpy(&xids[count], (void *) proc->subxids.xids,
2038 : nxids * sizeof(TransactionId));
2039 0 : count += nxids;
2040 0 : subcount += nxids;
2041 :
2042 : /*
2043 : * Top-level XID of a transaction is always less than any of
2044 : * its subxids, so we don't need to check if any of the
2045 : * subxids are smaller than oldestRunningXid
2046 : */
2047 : }
2048 : }
2049 : }
2050 :
2051 : /*
2052 : * It's important *not* to include the limits set by slots here because
2053 : * snapbuild.c uses oldestRunningXid to manage its xmin horizon. If those
2054 : * were to be included here the initial value could never increase because
2055 : * of a circular dependency where slots only increase their limits when
2056 : * running xacts increases oldestRunningXid and running xacts only
2057 : * increases if slots do.
2058 : */
2059 :
2060 14 : CurrentRunningXacts->xcnt = count - subcount;
2061 14 : CurrentRunningXacts->subxcnt = subcount;
2062 14 : CurrentRunningXacts->subxid_overflow = suboverflowed;
2063 14 : CurrentRunningXacts->nextXid = ShmemVariableCache->nextXid;
2064 14 : CurrentRunningXacts->oldestRunningXid = oldestRunningXid;
2065 14 : CurrentRunningXacts->latestCompletedXid = latestCompletedXid;
2066 :
2067 14 : Assert(TransactionIdIsValid(CurrentRunningXacts->nextXid));
2068 14 : Assert(TransactionIdIsValid(CurrentRunningXacts->oldestRunningXid));
2069 14 : Assert(TransactionIdIsNormal(CurrentRunningXacts->latestCompletedXid));
2070 :
2071 : /* We don't release the locks here, the caller is responsible for that */
2072 :
2073 14 : return CurrentRunningXacts;
2074 : }
2075 :
2076 : /*
2077 : * GetOldestActiveTransactionId()
2078 : *
2079 : * Similar to GetSnapshotData but returns just oldestActiveXid. We include
2080 : * all PGXACTs with an assigned TransactionId, even VACUUM processes.
2081 : * We look at all databases, though there is no need to include WALSender
2082 : * since this has no effect on hot standby conflicts.
2083 : *
2084 : * This is never executed during recovery so there is no need to look at
2085 : * KnownAssignedXids.
2086 : *
2087 : * We don't worry about updating other counters, we want to keep this as
2088 : * simple as possible and leave GetSnapshotData() as the primary code for
2089 : * that bookkeeping.
2090 : */
2091 : TransactionId
2092 8 : GetOldestActiveTransactionId(void)
2093 : {
2094 8 : ProcArrayStruct *arrayP = procArray;
2095 : TransactionId oldestRunningXid;
2096 : int index;
2097 :
2098 8 : Assert(!RecoveryInProgress());
2099 :
2100 : /*
2101 : * Read nextXid, as the upper bound of what's still active.
2102 : *
2103 : * Reading a TransactionId is atomic, but we must grab the lock to make
2104 : * sure that all XIDs < nextXid are already present in the proc array (or
2105 : * have already completed), when we spin over it.
2106 : */
2107 8 : LWLockAcquire(XidGenLock, LW_SHARED);
2108 8 : oldestRunningXid = ShmemVariableCache->nextXid;
2109 8 : LWLockRelease(XidGenLock);
2110 :
2111 : /*
2112 : * Spin over procArray collecting all xids and subxids.
2113 : */
2114 8 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2115 24 : for (index = 0; index < arrayP->numProcs; index++)
2116 : {
2117 16 : int pgprocno = arrayP->pgprocnos[index];
2118 16 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2119 : TransactionId xid;
2120 :
2121 : /* Fetch xid just once - see GetNewTransactionId */
2122 16 : xid = pgxact->xid;
2123 :
2124 16 : if (!TransactionIdIsNormal(xid))
2125 8 : continue;
2126 :
2127 8 : if (TransactionIdPrecedes(xid, oldestRunningXid))
2128 8 : oldestRunningXid = xid;
2129 :
2130 : /*
2131 : * Top-level XID of a transaction is always less than any of its
2132 : * subxids, so we don't need to check if any of the subxids are
2133 : * smaller than oldestRunningXid
2134 : */
2135 : }
2136 8 : LWLockRelease(ProcArrayLock);
2137 :
2138 8 : return oldestRunningXid;
2139 : }
2140 :
2141 : /*
2142 : * GetOldestSafeDecodingTransactionId -- lowest xid not affected by vacuum
2143 : *
2144 : * Returns the oldest xid that we can guarantee not to have been affected by
2145 : * vacuum, i.e. no rows >= that xid have been vacuumed away unless the
2146 : * transaction aborted. Note that the value can (and most of the time will) be
2147 : * much more conservative than what really has been affected by vacuum, but we
2148 : * currently don't have better data available.
2149 : *
2150 : * This is useful to initialize the cutoff xid after which a new changeset
2151 : * extraction replication slot can start decoding changes.
2152 : *
2153 : * Must be called with ProcArrayLock held either shared or exclusively,
2154 : * although most callers will want to use exclusive mode since it is expected
2155 : * that the caller will immediately use the xid to peg the xmin horizon.
2156 : */
2157 : TransactionId
2158 0 : GetOldestSafeDecodingTransactionId(bool catalogOnly)
2159 : {
2160 0 : ProcArrayStruct *arrayP = procArray;
2161 : TransactionId oldestSafeXid;
2162 : int index;
2163 0 : bool recovery_in_progress = RecoveryInProgress();
2164 :
2165 0 : Assert(LWLockHeldByMe(ProcArrayLock));
2166 :
2167 : /*
2168 : * Acquire XidGenLock, so no transactions can acquire an xid while we're
2169 : * running. If no transaction with xid were running concurrently a new xid
2170 : * could influence the RecentXmin et al.
2171 : *
2172 : * We initialize the computation to nextXid since that's guaranteed to be
2173 : * a safe, albeit pessimal, value.
2174 : */
2175 0 : LWLockAcquire(XidGenLock, LW_SHARED);
2176 0 : oldestSafeXid = ShmemVariableCache->nextXid;
2177 :
2178 : /*
2179 : * If there's already a slot pegging the xmin horizon, we can start with
2180 : * that value, it's guaranteed to be safe since it's computed by this
2181 : * routine initially and has been enforced since. We can always use the
2182 : * slot's general xmin horizon, but the catalog horizon is only usable
2183 : * when we only catalog data is going to be looked at.
2184 : */
2185 0 : if (TransactionIdIsValid(procArray->replication_slot_xmin) &&
2186 0 : TransactionIdPrecedes(procArray->replication_slot_xmin,
2187 : oldestSafeXid))
2188 0 : oldestSafeXid = procArray->replication_slot_xmin;
2189 :
2190 0 : if (catalogOnly &&
2191 0 : TransactionIdIsValid(procArray->replication_slot_catalog_xmin) &&
2192 0 : TransactionIdPrecedes(procArray->replication_slot_catalog_xmin,
2193 : oldestSafeXid))
2194 0 : oldestSafeXid = procArray->replication_slot_catalog_xmin;
2195 :
2196 : /*
2197 : * If we're not in recovery, we walk over the procarray and collect the
2198 : * lowest xid. Since we're called with ProcArrayLock held and have
2199 : * acquired XidGenLock, no entries can vanish concurrently, since
2200 : * PGXACT->xid is only set with XidGenLock held and only cleared with
2201 : * ProcArrayLock held.
2202 : *
2203 : * In recovery we can't lower the safe value besides what we've computed
2204 : * above, so we'll have to wait a bit longer there. We unfortunately can
2205 : * *not* use KnownAssignedXidsGetOldestXmin() since the KnownAssignedXids
2206 : * machinery can miss values and return an older value than is safe.
2207 : */
2208 0 : if (!recovery_in_progress)
2209 : {
2210 : /*
2211 : * Spin over procArray collecting all min(PGXACT->xid)
2212 : */
2213 0 : for (index = 0; index < arrayP->numProcs; index++)
2214 : {
2215 0 : int pgprocno = arrayP->pgprocnos[index];
2216 0 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2217 : TransactionId xid;
2218 :
2219 : /* Fetch xid just once - see GetNewTransactionId */
2220 0 : xid = pgxact->xid;
2221 :
2222 0 : if (!TransactionIdIsNormal(xid))
2223 0 : continue;
2224 :
2225 0 : if (TransactionIdPrecedes(xid, oldestSafeXid))
2226 0 : oldestSafeXid = xid;
2227 : }
2228 : }
2229 :
2230 0 : LWLockRelease(XidGenLock);
2231 :
2232 0 : return oldestSafeXid;
2233 : }
2234 :
2235 : /*
2236 : * GetVirtualXIDsDelayingChkpt -- Get the VXIDs of transactions that are
2237 : * delaying checkpoint because they have critical actions in progress.
2238 : *
2239 : * Constructs an array of VXIDs of transactions that are currently in commit
2240 : * critical sections, as shown by having delayChkpt set in their PGXACT.
2241 : *
2242 : * Returns a palloc'd array that should be freed by the caller.
2243 : * *nvxids is the number of valid entries.
2244 : *
2245 : * Note that because backends set or clear delayChkpt without holding any lock,
2246 : * the result is somewhat indeterminate, but we don't really care. Even in
2247 : * a multiprocessor with delayed writes to shared memory, it should be certain
2248 : * that setting of delayChkpt will propagate to shared memory when the backend
2249 : * takes a lock, so we cannot fail to see a virtual xact as delayChkpt if
2250 : * it's already inserted its commit record. Whether it takes a little while
2251 : * for clearing of delayChkpt to propagate is unimportant for correctness.
2252 : */
2253 : VirtualTransactionId *
2254 11 : GetVirtualXIDsDelayingChkpt(int *nvxids)
2255 : {
2256 : VirtualTransactionId *vxids;
2257 11 : ProcArrayStruct *arrayP = procArray;
2258 11 : int count = 0;
2259 : int index;
2260 :
2261 : /* allocate what's certainly enough result space */
2262 11 : vxids = (VirtualTransactionId *)
2263 11 : palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2264 :
2265 11 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2266 :
2267 29 : for (index = 0; index < arrayP->numProcs; index++)
2268 : {
2269 18 : int pgprocno = arrayP->pgprocnos[index];
2270 18 : volatile PGPROC *proc = &allProcs[pgprocno];
2271 18 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2272 :
2273 18 : if (pgxact->delayChkpt)
2274 : {
2275 : VirtualTransactionId vxid;
2276 :
2277 0 : GET_VXID_FROM_PGPROC(vxid, *proc);
2278 0 : if (VirtualTransactionIdIsValid(vxid))
2279 0 : vxids[count++] = vxid;
2280 : }
2281 : }
2282 :
2283 11 : LWLockRelease(ProcArrayLock);
2284 :
2285 11 : *nvxids = count;
2286 11 : return vxids;
2287 : }
2288 :
2289 : /*
2290 : * HaveVirtualXIDsDelayingChkpt -- Are any of the specified VXIDs delaying?
2291 : *
2292 : * This is used with the results of GetVirtualXIDsDelayingChkpt to see if any
2293 : * of the specified VXIDs are still in critical sections of code.
2294 : *
2295 : * Note: this is O(N^2) in the number of vxacts that are/were delaying, but
2296 : * those numbers should be small enough for it not to be a problem.
2297 : */
2298 : bool
2299 0 : HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
2300 : {
2301 0 : bool result = false;
2302 0 : ProcArrayStruct *arrayP = procArray;
2303 : int index;
2304 :
2305 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2306 :
2307 0 : for (index = 0; index < arrayP->numProcs; index++)
2308 : {
2309 0 : int pgprocno = arrayP->pgprocnos[index];
2310 0 : volatile PGPROC *proc = &allProcs[pgprocno];
2311 0 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2312 : VirtualTransactionId vxid;
2313 :
2314 0 : GET_VXID_FROM_PGPROC(vxid, *proc);
2315 :
2316 0 : if (pgxact->delayChkpt && VirtualTransactionIdIsValid(vxid))
2317 : {
2318 : int i;
2319 :
2320 0 : for (i = 0; i < nvxids; i++)
2321 : {
2322 0 : if (VirtualTransactionIdEquals(vxid, vxids[i]))
2323 : {
2324 0 : result = true;
2325 0 : break;
2326 : }
2327 : }
2328 0 : if (result)
2329 0 : break;
2330 : }
2331 : }
2332 :
2333 0 : LWLockRelease(ProcArrayLock);
2334 :
2335 0 : return result;
2336 : }
2337 :
2338 : /*
2339 : * BackendPidGetProc -- get a backend's PGPROC given its PID
2340 : *
2341 : * Returns NULL if not found. Note that it is up to the caller to be
2342 : * sure that the question remains meaningful for long enough for the
2343 : * answer to be used ...
2344 : */
2345 : PGPROC *
2346 0 : BackendPidGetProc(int pid)
2347 : {
2348 : PGPROC *result;
2349 :
2350 0 : if (pid == 0) /* never match dummy PGPROCs */
2351 0 : return NULL;
2352 :
2353 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2354 :
2355 0 : result = BackendPidGetProcWithLock(pid);
2356 :
2357 0 : LWLockRelease(ProcArrayLock);
2358 :
2359 0 : return result;
2360 : }
2361 :
2362 : /*
2363 : * BackendPidGetProcWithLock -- get a backend's PGPROC given its PID
2364 : *
2365 : * Same as above, except caller must be holding ProcArrayLock. The found
2366 : * entry, if any, can be assumed to be valid as long as the lock remains held.
2367 : */
2368 : PGPROC *
2369 0 : BackendPidGetProcWithLock(int pid)
2370 : {
2371 0 : PGPROC *result = NULL;
2372 0 : ProcArrayStruct *arrayP = procArray;
2373 : int index;
2374 :
2375 0 : if (pid == 0) /* never match dummy PGPROCs */
2376 0 : return NULL;
2377 :
2378 0 : for (index = 0; index < arrayP->numProcs; index++)
2379 : {
2380 0 : PGPROC *proc = &allProcs[arrayP->pgprocnos[index]];
2381 :
2382 0 : if (proc->pid == pid)
2383 : {
2384 0 : result = proc;
2385 0 : break;
2386 : }
2387 : }
2388 :
2389 0 : return result;
2390 : }
2391 :
2392 : /*
2393 : * BackendXidGetPid -- get a backend's pid given its XID
2394 : *
2395 : * Returns 0 if not found or it's a prepared transaction. Note that
2396 : * it is up to the caller to be sure that the question remains
2397 : * meaningful for long enough for the answer to be used ...
2398 : *
2399 : * Only main transaction Ids are considered. This function is mainly
2400 : * useful for determining what backend owns a lock.
2401 : *
2402 : * Beware that not every xact has an XID assigned. However, as long as you
2403 : * only call this using an XID found on disk, you're safe.
2404 : */
2405 : int
2406 0 : BackendXidGetPid(TransactionId xid)
2407 : {
2408 0 : int result = 0;
2409 0 : ProcArrayStruct *arrayP = procArray;
2410 : int index;
2411 :
2412 0 : if (xid == InvalidTransactionId) /* never match invalid xid */
2413 0 : return 0;
2414 :
2415 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2416 :
2417 0 : for (index = 0; index < arrayP->numProcs; index++)
2418 : {
2419 0 : int pgprocno = arrayP->pgprocnos[index];
2420 0 : volatile PGPROC *proc = &allProcs[pgprocno];
2421 0 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2422 :
2423 0 : if (pgxact->xid == xid)
2424 : {
2425 0 : result = proc->pid;
2426 0 : break;
2427 : }
2428 : }
2429 :
2430 0 : LWLockRelease(ProcArrayLock);
2431 :
2432 0 : return result;
2433 : }
2434 :
2435 : /*
2436 : * IsBackendPid -- is a given pid a running backend
2437 : *
2438 : * This is not called by the backend, but is called by external modules.
2439 : */
2440 : bool
2441 0 : IsBackendPid(int pid)
2442 : {
2443 0 : return (BackendPidGetProc(pid) != NULL);
2444 : }
2445 :
2446 :
2447 : /*
2448 : * GetCurrentVirtualXIDs -- returns an array of currently active VXIDs.
2449 : *
2450 : * The array is palloc'd. The number of valid entries is returned into *nvxids.
2451 : *
2452 : * The arguments allow filtering the set of VXIDs returned. Our own process
2453 : * is always skipped. In addition:
2454 : * If limitXmin is not InvalidTransactionId, skip processes with
2455 : * xmin > limitXmin.
2456 : * If excludeXmin0 is true, skip processes with xmin = 0.
2457 : * If allDbs is false, skip processes attached to other databases.
2458 : * If excludeVacuum isn't zero, skip processes for which
2459 : * (vacuumFlags & excludeVacuum) is not zero.
2460 : *
2461 : * Note: the purpose of the limitXmin and excludeXmin0 parameters is to
2462 : * allow skipping backends whose oldest live snapshot is no older than
2463 : * some snapshot we have. Since we examine the procarray with only shared
2464 : * lock, there are race conditions: a backend could set its xmin just after
2465 : * we look. Indeed, on multiprocessors with weak memory ordering, the
2466 : * other backend could have set its xmin *before* we look. We know however
2467 : * that such a backend must have held shared ProcArrayLock overlapping our
2468 : * own hold of ProcArrayLock, else we would see its xmin update. Therefore,
2469 : * any snapshot the other backend is taking concurrently with our scan cannot
2470 : * consider any transactions as still running that we think are committed
2471 : * (since backends must hold ProcArrayLock exclusive to commit).
2472 : */
2473 : VirtualTransactionId *
2474 5 : GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0,
2475 : bool allDbs, int excludeVacuum,
2476 : int *nvxids)
2477 : {
2478 : VirtualTransactionId *vxids;
2479 5 : ProcArrayStruct *arrayP = procArray;
2480 5 : int count = 0;
2481 : int index;
2482 :
2483 : /* allocate what's certainly enough result space */
2484 5 : vxids = (VirtualTransactionId *)
2485 5 : palloc(sizeof(VirtualTransactionId) * arrayP->maxProcs);
2486 :
2487 5 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2488 :
2489 20 : for (index = 0; index < arrayP->numProcs; index++)
2490 : {
2491 15 : int pgprocno = arrayP->pgprocnos[index];
2492 15 : volatile PGPROC *proc = &allProcs[pgprocno];
2493 15 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2494 :
2495 15 : if (proc == MyProc)
2496 5 : continue;
2497 :
2498 10 : if (excludeVacuum & pgxact->vacuumFlags)
2499 0 : continue;
2500 :
2501 10 : if (allDbs || proc->databaseId == MyDatabaseId)
2502 : {
2503 : /* Fetch xmin just once - might change on us */
2504 0 : TransactionId pxmin = pgxact->xmin;
2505 :
2506 0 : if (excludeXmin0 && !TransactionIdIsValid(pxmin))
2507 0 : continue;
2508 :
2509 : /*
2510 : * InvalidTransactionId precedes all other XIDs, so a proc that
2511 : * hasn't set xmin yet will not be rejected by this test.
2512 : */
2513 0 : if (!TransactionIdIsValid(limitXmin) ||
2514 0 : TransactionIdPrecedesOrEquals(pxmin, limitXmin))
2515 : {
2516 : VirtualTransactionId vxid;
2517 :
2518 0 : GET_VXID_FROM_PGPROC(vxid, *proc);
2519 0 : if (VirtualTransactionIdIsValid(vxid))
2520 0 : vxids[count++] = vxid;
2521 : }
2522 : }
2523 : }
2524 :
2525 5 : LWLockRelease(ProcArrayLock);
2526 :
2527 5 : *nvxids = count;
2528 5 : return vxids;
2529 : }
2530 :
2531 : /*
2532 : * GetConflictingVirtualXIDs -- returns an array of currently active VXIDs.
2533 : *
2534 : * Usage is limited to conflict resolution during recovery on standby servers.
2535 : * limitXmin is supplied as either latestRemovedXid, or InvalidTransactionId
2536 : * in cases where we cannot accurately determine a value for latestRemovedXid.
2537 : *
2538 : * If limitXmin is InvalidTransactionId then we want to kill everybody,
2539 : * so we're not worried if they have a snapshot or not, nor does it really
2540 : * matter what type of lock we hold.
2541 : *
2542 : * All callers that are checking xmins always now supply a valid and useful
2543 : * value for limitXmin. The limitXmin is always lower than the lowest
2544 : * numbered KnownAssignedXid that is not already a FATAL error. This is
2545 : * because we only care about cleanup records that are cleaning up tuple
2546 : * versions from committed transactions. In that case they will only occur
2547 : * at the point where the record is less than the lowest running xid. That
2548 : * allows us to say that if any backend takes a snapshot concurrently with
2549 : * us then the conflict assessment made here would never include the snapshot
2550 : * that is being derived. So we take LW_SHARED on the ProcArray and allow
2551 : * concurrent snapshots when limitXmin is valid. We might think about adding
2552 : * Assert(limitXmin < lowest(KnownAssignedXids))
2553 : * but that would not be true in the case of FATAL errors lagging in array,
2554 : * but we already know those are bogus anyway, so we skip that test.
2555 : *
2556 : * If dbOid is valid we skip backends attached to other databases.
2557 : *
2558 : * Be careful to *not* pfree the result from this function. We reuse
2559 : * this array sufficiently often that we use malloc for the result.
2560 : */
2561 : VirtualTransactionId *
2562 0 : GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid)
2563 : {
2564 : static VirtualTransactionId *vxids;
2565 0 : ProcArrayStruct *arrayP = procArray;
2566 0 : int count = 0;
2567 : int index;
2568 :
2569 : /*
2570 : * If first time through, get workspace to remember main XIDs in. We
2571 : * malloc it permanently to avoid repeated palloc/pfree overhead. Allow
2572 : * result space, remembering room for a terminator.
2573 : */
2574 0 : if (vxids == NULL)
2575 : {
2576 0 : vxids = (VirtualTransactionId *)
2577 0 : malloc(sizeof(VirtualTransactionId) * (arrayP->maxProcs + 1));
2578 0 : if (vxids == NULL)
2579 0 : ereport(ERROR,
2580 : (errcode(ERRCODE_OUT_OF_MEMORY),
2581 : errmsg("out of memory")));
2582 : }
2583 :
2584 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2585 :
2586 0 : for (index = 0; index < arrayP->numProcs; index++)
2587 : {
2588 0 : int pgprocno = arrayP->pgprocnos[index];
2589 0 : volatile PGPROC *proc = &allProcs[pgprocno];
2590 0 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2591 :
2592 : /* Exclude prepared transactions */
2593 0 : if (proc->pid == 0)
2594 0 : continue;
2595 :
2596 0 : if (!OidIsValid(dbOid) ||
2597 0 : proc->databaseId == dbOid)
2598 : {
2599 : /* Fetch xmin just once - can't change on us, but good coding */
2600 0 : TransactionId pxmin = pgxact->xmin;
2601 :
2602 : /*
2603 : * We ignore an invalid pxmin because this means that backend has
2604 : * no snapshot currently. We hold a Share lock to avoid contention
2605 : * with users taking snapshots. That is not a problem because the
2606 : * current xmin is always at least one higher than the latest
2607 : * removed xid, so any new snapshot would never conflict with the
2608 : * test here.
2609 : */
2610 0 : if (!TransactionIdIsValid(limitXmin) ||
2611 0 : (TransactionIdIsValid(pxmin) && !TransactionIdFollows(pxmin, limitXmin)))
2612 : {
2613 : VirtualTransactionId vxid;
2614 :
2615 0 : GET_VXID_FROM_PGPROC(vxid, *proc);
2616 0 : if (VirtualTransactionIdIsValid(vxid))
2617 0 : vxids[count++] = vxid;
2618 : }
2619 : }
2620 : }
2621 :
2622 0 : LWLockRelease(ProcArrayLock);
2623 :
2624 : /* add the terminator */
2625 0 : vxids[count].backendId = InvalidBackendId;
2626 0 : vxids[count].localTransactionId = InvalidLocalTransactionId;
2627 :
2628 0 : return vxids;
2629 : }
2630 :
2631 : /*
2632 : * CancelVirtualTransaction - used in recovery conflict processing
2633 : *
2634 : * Returns pid of the process signaled, or 0 if not found.
2635 : */
2636 : pid_t
2637 0 : CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReason sigmode)
2638 : {
2639 0 : ProcArrayStruct *arrayP = procArray;
2640 : int index;
2641 0 : pid_t pid = 0;
2642 :
2643 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2644 :
2645 0 : for (index = 0; index < arrayP->numProcs; index++)
2646 : {
2647 0 : int pgprocno = arrayP->pgprocnos[index];
2648 0 : volatile PGPROC *proc = &allProcs[pgprocno];
2649 : VirtualTransactionId procvxid;
2650 :
2651 0 : GET_VXID_FROM_PGPROC(procvxid, *proc);
2652 :
2653 0 : if (procvxid.backendId == vxid.backendId &&
2654 0 : procvxid.localTransactionId == vxid.localTransactionId)
2655 : {
2656 0 : proc->recoveryConflictPending = true;
2657 0 : pid = proc->pid;
2658 0 : if (pid != 0)
2659 : {
2660 : /*
2661 : * Kill the pid if it's still here. If not, that's what we
2662 : * wanted so ignore any errors.
2663 : */
2664 0 : (void) SendProcSignal(pid, sigmode, vxid.backendId);
2665 : }
2666 0 : break;
2667 : }
2668 : }
2669 :
2670 0 : LWLockRelease(ProcArrayLock);
2671 :
2672 0 : return pid;
2673 : }
2674 :
2675 : /*
2676 : * MinimumActiveBackends --- count backends (other than myself) that are
2677 : * in active transactions. Return true if the count exceeds the
2678 : * minimum threshold passed. This is used as a heuristic to decide if
2679 : * a pre-XLOG-flush delay is worthwhile during commit.
2680 : *
2681 : * Do not count backends that are blocked waiting for locks, since they are
2682 : * not going to get to run until someone else commits.
2683 : */
2684 : bool
2685 0 : MinimumActiveBackends(int min)
2686 : {
2687 0 : ProcArrayStruct *arrayP = procArray;
2688 0 : int count = 0;
2689 : int index;
2690 :
2691 : /* Quick short-circuit if no minimum is specified */
2692 0 : if (min == 0)
2693 0 : return true;
2694 :
2695 : /*
2696 : * Note: for speed, we don't acquire ProcArrayLock. This is a little bit
2697 : * bogus, but since we are only testing fields for zero or nonzero, it
2698 : * should be OK. The result is only used for heuristic purposes anyway...
2699 : */
2700 0 : for (index = 0; index < arrayP->numProcs; index++)
2701 : {
2702 0 : int pgprocno = arrayP->pgprocnos[index];
2703 0 : volatile PGPROC *proc = &allProcs[pgprocno];
2704 0 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2705 :
2706 : /*
2707 : * Since we're not holding a lock, need to be prepared to deal with
2708 : * garbage, as someone could have incremented numProcs but not yet
2709 : * filled the structure.
2710 : *
2711 : * If someone just decremented numProcs, 'proc' could also point to a
2712 : * PGPROC entry that's no longer in the array. It still points to a
2713 : * PGPROC struct, though, because freed PGPROC entries just go to the
2714 : * free list and are recycled. Its contents are nonsense in that case,
2715 : * but that's acceptable for this function.
2716 : */
2717 0 : if (pgprocno == -1)
2718 0 : continue; /* do not count deleted entries */
2719 0 : if (proc == MyProc)
2720 0 : continue; /* do not count myself */
2721 0 : if (pgxact->xid == InvalidTransactionId)
2722 0 : continue; /* do not count if no XID assigned */
2723 0 : if (proc->pid == 0)
2724 0 : continue; /* do not count prepared xacts */
2725 0 : if (proc->waitLock != NULL)
2726 0 : continue; /* do not count if blocked on a lock */
2727 0 : count++;
2728 0 : if (count >= min)
2729 0 : break;
2730 : }
2731 :
2732 0 : return count >= min;
2733 : }
2734 :
2735 : /*
2736 : * CountDBBackends --- count backends that are using specified database
2737 : */
2738 : int
2739 0 : CountDBBackends(Oid databaseid)
2740 : {
2741 0 : ProcArrayStruct *arrayP = procArray;
2742 0 : int count = 0;
2743 : int index;
2744 :
2745 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2746 :
2747 0 : for (index = 0; index < arrayP->numProcs; index++)
2748 : {
2749 0 : int pgprocno = arrayP->pgprocnos[index];
2750 0 : volatile PGPROC *proc = &allProcs[pgprocno];
2751 :
2752 0 : if (proc->pid == 0)
2753 0 : continue; /* do not count prepared xacts */
2754 0 : if (!OidIsValid(databaseid) ||
2755 0 : proc->databaseId == databaseid)
2756 0 : count++;
2757 : }
2758 :
2759 0 : LWLockRelease(ProcArrayLock);
2760 :
2761 0 : return count;
2762 : }
2763 :
2764 : /*
2765 : * CountDBConnections --- counts database backends ignoring any background
2766 : * worker processes
2767 : */
2768 : int
2769 0 : CountDBConnections(Oid databaseid)
2770 : {
2771 0 : ProcArrayStruct *arrayP = procArray;
2772 0 : int count = 0;
2773 : int index;
2774 :
2775 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2776 :
2777 0 : for (index = 0; index < arrayP->numProcs; index++)
2778 : {
2779 0 : int pgprocno = arrayP->pgprocnos[index];
2780 0 : volatile PGPROC *proc = &allProcs[pgprocno];
2781 :
2782 0 : if (proc->pid == 0)
2783 0 : continue; /* do not count prepared xacts */
2784 0 : if (proc->isBackgroundWorker)
2785 0 : continue; /* do not count background workers */
2786 0 : if (!OidIsValid(databaseid) ||
2787 0 : proc->databaseId == databaseid)
2788 0 : count++;
2789 : }
2790 :
2791 0 : LWLockRelease(ProcArrayLock);
2792 :
2793 0 : return count;
2794 : }
2795 :
2796 : /*
2797 : * CancelDBBackends --- cancel backends that are using specified database
2798 : */
2799 : void
2800 0 : CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending)
2801 : {
2802 0 : ProcArrayStruct *arrayP = procArray;
2803 : int index;
2804 0 : pid_t pid = 0;
2805 :
2806 : /* tell all backends to die */
2807 0 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2808 :
2809 0 : for (index = 0; index < arrayP->numProcs; index++)
2810 : {
2811 0 : int pgprocno = arrayP->pgprocnos[index];
2812 0 : volatile PGPROC *proc = &allProcs[pgprocno];
2813 :
2814 0 : if (databaseid == InvalidOid || proc->databaseId == databaseid)
2815 : {
2816 : VirtualTransactionId procvxid;
2817 :
2818 0 : GET_VXID_FROM_PGPROC(procvxid, *proc);
2819 :
2820 0 : proc->recoveryConflictPending = conflictPending;
2821 0 : pid = proc->pid;
2822 0 : if (pid != 0)
2823 : {
2824 : /*
2825 : * Kill the pid if it's still here. If not, that's what we
2826 : * wanted so ignore any errors.
2827 : */
2828 0 : (void) SendProcSignal(pid, sigmode, procvxid.backendId);
2829 : }
2830 : }
2831 : }
2832 :
2833 0 : LWLockRelease(ProcArrayLock);
2834 0 : }
2835 :
2836 : /*
2837 : * CountUserBackends --- count backends that are used by specified user
2838 : */
2839 : int
2840 0 : CountUserBackends(Oid roleid)
2841 : {
2842 0 : ProcArrayStruct *arrayP = procArray;
2843 0 : int count = 0;
2844 : int index;
2845 :
2846 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2847 :
2848 0 : for (index = 0; index < arrayP->numProcs; index++)
2849 : {
2850 0 : int pgprocno = arrayP->pgprocnos[index];
2851 0 : volatile PGPROC *proc = &allProcs[pgprocno];
2852 :
2853 0 : if (proc->pid == 0)
2854 0 : continue; /* do not count prepared xacts */
2855 0 : if (proc->isBackgroundWorker)
2856 0 : continue; /* do not count background workers */
2857 0 : if (proc->roleId == roleid)
2858 0 : count++;
2859 : }
2860 :
2861 0 : LWLockRelease(ProcArrayLock);
2862 :
2863 0 : return count;
2864 : }
2865 :
2866 : /*
2867 : * CountOtherDBBackends -- check for other backends running in the given DB
2868 : *
2869 : * If there are other backends in the DB, we will wait a maximum of 5 seconds
2870 : * for them to exit. Autovacuum backends are encouraged to exit early by
2871 : * sending them SIGTERM, but normal user backends are just waited for.
2872 : *
2873 : * The current backend is always ignored; it is caller's responsibility to
2874 : * check whether the current backend uses the given DB, if it's important.
2875 : *
2876 : * Returns TRUE if there are (still) other backends in the DB, FALSE if not.
2877 : * Also, *nbackends and *nprepared are set to the number of other backends
2878 : * and prepared transactions in the DB, respectively.
2879 : *
2880 : * This function is used to interlock DROP DATABASE and related commands
2881 : * against there being any active backends in the target DB --- dropping the
2882 : * DB while active backends remain would be a Bad Thing. Note that we cannot
2883 : * detect here the possibility of a newly-started backend that is trying to
2884 : * connect to the doomed database, so additional interlocking is needed during
2885 : * backend startup. The caller should normally hold an exclusive lock on the
2886 : * target DB before calling this, which is one reason we mustn't wait
2887 : * indefinitely.
2888 : */
2889 : bool
2890 3 : CountOtherDBBackends(Oid databaseId, int *nbackends, int *nprepared)
2891 : {
2892 3 : ProcArrayStruct *arrayP = procArray;
2893 :
2894 : #define MAXAUTOVACPIDS 10 /* max autovacs to SIGTERM per iteration */
2895 : int autovac_pids[MAXAUTOVACPIDS];
2896 : int tries;
2897 :
2898 : /* 50 tries with 100ms sleep between tries makes 5 sec total wait */
2899 3 : for (tries = 0; tries < 50; tries++)
2900 : {
2901 3 : int nautovacs = 0;
2902 3 : bool found = false;
2903 : int index;
2904 :
2905 3 : CHECK_FOR_INTERRUPTS();
2906 :
2907 3 : *nbackends = *nprepared = 0;
2908 :
2909 3 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2910 :
2911 8 : for (index = 0; index < arrayP->numProcs; index++)
2912 : {
2913 5 : int pgprocno = arrayP->pgprocnos[index];
2914 5 : volatile PGPROC *proc = &allProcs[pgprocno];
2915 5 : volatile PGXACT *pgxact = &allPgXact[pgprocno];
2916 :
2917 5 : if (proc->databaseId != databaseId)
2918 3 : continue;
2919 2 : if (proc == MyProc)
2920 2 : continue;
2921 :
2922 0 : found = true;
2923 :
2924 0 : if (proc->pid == 0)
2925 0 : (*nprepared)++;
2926 : else
2927 : {
2928 0 : (*nbackends)++;
2929 0 : if ((pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) &&
2930 : nautovacs < MAXAUTOVACPIDS)
2931 0 : autovac_pids[nautovacs++] = proc->pid;
2932 : }
2933 : }
2934 :
2935 3 : LWLockRelease(ProcArrayLock);
2936 :
2937 3 : if (!found)
2938 3 : return false; /* no conflicting backends, so done */
2939 :
2940 : /*
2941 : * Send SIGTERM to any conflicting autovacuums before sleeping. We
2942 : * postpone this step until after the loop because we don't want to
2943 : * hold ProcArrayLock while issuing kill(). We have no idea what might
2944 : * block kill() inside the kernel...
2945 : */
2946 0 : for (index = 0; index < nautovacs; index++)
2947 0 : (void) kill(autovac_pids[index], SIGTERM); /* ignore any error */
2948 :
2949 : /* sleep, then try again */
2950 0 : pg_usleep(100 * 1000L); /* 100ms */
2951 : }
2952 :
2953 0 : return true; /* timed out, still conflicts */
2954 : }
2955 :
2956 : /*
2957 : * ProcArraySetReplicationSlotXmin
2958 : *
2959 : * Install limits to future computations of the xmin horizon to prevent vacuum
2960 : * and HOT pruning from removing affected rows still needed by clients with
2961 : * replicaton slots.
2962 : */
2963 : void
2964 3 : ProcArraySetReplicationSlotXmin(TransactionId xmin, TransactionId catalog_xmin,
2965 : bool already_locked)
2966 : {
2967 3 : Assert(!already_locked || LWLockHeldByMe(ProcArrayLock));
2968 :
2969 3 : if (!already_locked)
2970 3 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
2971 :
2972 3 : procArray->replication_slot_xmin = xmin;
2973 3 : procArray->replication_slot_catalog_xmin = catalog_xmin;
2974 :
2975 3 : if (!already_locked)
2976 3 : LWLockRelease(ProcArrayLock);
2977 3 : }
2978 :
2979 : /*
2980 : * ProcArrayGetReplicationSlotXmin
2981 : *
2982 : * Return the current slot xmin limits. That's useful to be able to remove
2983 : * data that's older than those limits.
2984 : */
2985 : void
2986 0 : ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
2987 : TransactionId *catalog_xmin)
2988 : {
2989 0 : LWLockAcquire(ProcArrayLock, LW_SHARED);
2990 :
2991 0 : if (xmin != NULL)
2992 0 : *xmin = procArray->replication_slot_xmin;
2993 :
2994 0 : if (catalog_xmin != NULL)
2995 0 : *catalog_xmin = procArray->replication_slot_catalog_xmin;
2996 :
2997 0 : LWLockRelease(ProcArrayLock);
2998 0 : }
2999 :
3000 :
3001 : #define XidCacheRemove(i) \
3002 : do { \
3003 : MyProc->subxids.xids[i] = MyProc->subxids.xids[MyPgXact->nxids - 1]; \
3004 : MyPgXact->nxids--; \
3005 : } while (0)
3006 :
3007 : /*
3008 : * XidCacheRemoveRunningXids
3009 : *
3010 : * Remove a bunch of TransactionIds from the list of known-running
3011 : * subtransactions for my backend. Both the specified xid and those in
3012 : * the xids[] array (of length nxids) are removed from the subxids cache.
3013 : * latestXid must be the latest XID among the group.
3014 : */
3015 : void
3016 40 : XidCacheRemoveRunningXids(TransactionId xid,
3017 : int nxids, const TransactionId *xids,
3018 : TransactionId latestXid)
3019 : {
3020 : int i,
3021 : j;
3022 :
3023 40 : Assert(TransactionIdIsValid(xid));
3024 :
3025 : /*
3026 : * We must hold ProcArrayLock exclusively in order to remove transactions
3027 : * from the PGPROC array. (See src/backend/access/transam/README.) It's
3028 : * possible this could be relaxed since we know this routine is only used
3029 : * to abort subtransactions, but pending closer analysis we'd best be
3030 : * conservative.
3031 : */
3032 40 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3033 :
3034 : /*
3035 : * Under normal circumstances xid and xids[] will be in increasing order,
3036 : * as will be the entries in subxids. Scan backwards to avoid O(N^2)
3037 : * behavior when removing a lot of xids.
3038 : */
3039 41 : for (i = nxids - 1; i >= 0; i--)
3040 : {
3041 1 : TransactionId anxid = xids[i];
3042 :
3043 1 : for (j = MyPgXact->nxids - 1; j >= 0; j--)
3044 : {
3045 1 : if (TransactionIdEquals(MyProc->subxids.xids[j], anxid))
3046 : {
3047 1 : XidCacheRemove(j);
3048 1 : break;
3049 : }
3050 : }
3051 :
3052 : /*
3053 : * Ordinarily we should have found it, unless the cache has
3054 : * overflowed. However it's also possible for this routine to be
3055 : * invoked multiple times for the same subtransaction, in case of an
3056 : * error during AbortSubTransaction. So instead of Assert, emit a
3057 : * debug warning.
3058 : */
3059 1 : if (j < 0 && !MyPgXact->overflowed)
3060 0 : elog(WARNING, "did not find subXID %u in MyProc", anxid);
3061 : }
3062 :
3063 40 : for (j = MyPgXact->nxids - 1; j >= 0; j--)
3064 : {
3065 40 : if (TransactionIdEquals(MyProc->subxids.xids[j], xid))
3066 : {
3067 40 : XidCacheRemove(j);
3068 40 : break;
3069 : }
3070 : }
3071 : /* Ordinarily we should have found it, unless the cache has overflowed */
3072 40 : if (j < 0 && !MyPgXact->overflowed)
3073 0 : elog(WARNING, "did not find subXID %u in MyProc", xid);
3074 :
3075 : /* Also advance global latestCompletedXid while holding the lock */
3076 40 : if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
3077 : latestXid))
3078 22 : ShmemVariableCache->latestCompletedXid = latestXid;
3079 :
3080 40 : LWLockRelease(ProcArrayLock);
3081 40 : }
3082 :
3083 : #ifdef XIDCACHE_DEBUG
3084 :
3085 : /*
3086 : * Print stats about effectiveness of XID cache
3087 : */
3088 : static void
3089 : DisplayXidCache(void)
3090 : {
3091 : fprintf(stderr,
3092 : "XidCache: xmin: %ld, known: %ld, myxact: %ld, latest: %ld, mainxid: %ld, childxid: %ld, knownassigned: %ld, nooflo: %ld, slow: %ld\n",
3093 : xc_by_recent_xmin,
3094 : xc_by_known_xact,
3095 : xc_by_my_xact,
3096 : xc_by_latest_xid,
3097 : xc_by_main_xid,
3098 : xc_by_child_xid,
3099 : xc_by_known_assigned,
3100 : xc_no_overflow,
3101 : xc_slow_answer);
3102 : }
3103 : #endif /* XIDCACHE_DEBUG */
3104 :
3105 :
3106 : /* ----------------------------------------------
3107 : * KnownAssignedTransactions sub-module
3108 : * ----------------------------------------------
3109 : */
3110 :
3111 : /*
3112 : * In Hot Standby mode, we maintain a list of transactions that are (or were)
3113 : * running in the master at the current point in WAL. These XIDs must be
3114 : * treated as running by standby transactions, even though they are not in
3115 : * the standby server's PGXACT array.
3116 : *
3117 : * We record all XIDs that we know have been assigned. That includes all the
3118 : * XIDs seen in WAL records, plus all unobserved XIDs that we can deduce have
3119 : * been assigned. We can deduce the existence of unobserved XIDs because we
3120 : * know XIDs are assigned in sequence, with no gaps. The KnownAssignedXids
3121 : * list expands as new XIDs are observed or inferred, and contracts when
3122 : * transaction completion records arrive.
3123 : *
3124 : * During hot standby we do not fret too much about the distinction between
3125 : * top-level XIDs and subtransaction XIDs. We store both together in the
3126 : * KnownAssignedXids list. In backends, this is copied into snapshots in
3127 : * GetSnapshotData(), taking advantage of the fact that XidInMVCCSnapshot()
3128 : * doesn't care about the distinction either. Subtransaction XIDs are
3129 : * effectively treated as top-level XIDs and in the typical case pg_subtrans
3130 : * links are *not* maintained (which does not affect visibility).
3131 : *
3132 : * We have room in KnownAssignedXids and in snapshots to hold maxProcs *
3133 : * (1 + PGPROC_MAX_CACHED_SUBXIDS) XIDs, so every master transaction must
3134 : * report its subtransaction XIDs in a WAL XLOG_XACT_ASSIGNMENT record at
3135 : * least every PGPROC_MAX_CACHED_SUBXIDS. When we receive one of these
3136 : * records, we mark the subXIDs as children of the top XID in pg_subtrans,
3137 : * and then remove them from KnownAssignedXids. This prevents overflow of
3138 : * KnownAssignedXids and snapshots, at the cost that status checks for these
3139 : * subXIDs will take a slower path through TransactionIdIsInProgress().
3140 : * This means that KnownAssignedXids is not necessarily complete for subXIDs,
3141 : * though it should be complete for top-level XIDs; this is the same situation
3142 : * that holds with respect to the PGPROC entries in normal running.
3143 : *
3144 : * When we throw away subXIDs from KnownAssignedXids, we need to keep track of
3145 : * that, similarly to tracking overflow of a PGPROC's subxids array. We do
3146 : * that by remembering the lastOverflowedXID, ie the last thrown-away subXID.
3147 : * As long as that is within the range of interesting XIDs, we have to assume
3148 : * that subXIDs are missing from snapshots. (Note that subXID overflow occurs
3149 : * on primary when 65th subXID arrives, whereas on standby it occurs when 64th
3150 : * subXID arrives - that is not an error.)
3151 : *
3152 : * Should a backend on primary somehow disappear before it can write an abort
3153 : * record, then we just leave those XIDs in KnownAssignedXids. They actually
3154 : * aborted but we think they were running; the distinction is irrelevant
3155 : * because either way any changes done by the transaction are not visible to
3156 : * backends in the standby. We prune KnownAssignedXids when
3157 : * XLOG_RUNNING_XACTS arrives, to forestall possible overflow of the
3158 : * array due to such dead XIDs.
3159 : */
3160 :
3161 : /*
3162 : * RecordKnownAssignedTransactionIds
3163 : * Record the given XID in KnownAssignedXids, as well as any preceding
3164 : * unobserved XIDs.
3165 : *
3166 : * RecordKnownAssignedTransactionIds() should be run for *every* WAL record
3167 : * associated with a transaction. Must be called for each record after we
3168 : * have executed StartupCLOG() et al, since we must ExtendCLOG() etc..
3169 : *
3170 : * Called during recovery in analogy with and in place of GetNewTransactionId()
3171 : */
3172 : void
3173 0 : RecordKnownAssignedTransactionIds(TransactionId xid)
3174 : {
3175 0 : Assert(standbyState >= STANDBY_INITIALIZED);
3176 0 : Assert(TransactionIdIsValid(xid));
3177 0 : Assert(TransactionIdIsValid(latestObservedXid));
3178 :
3179 0 : elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u",
3180 : xid, latestObservedXid);
3181 :
3182 : /*
3183 : * When a newly observed xid arrives, it is frequently the case that it is
3184 : * *not* the next xid in sequence. When this occurs, we must treat the
3185 : * intervening xids as running also.
3186 : */
3187 0 : if (TransactionIdFollows(xid, latestObservedXid))
3188 : {
3189 : TransactionId next_expected_xid;
3190 :
3191 : /*
3192 : * Extend subtrans like we do in GetNewTransactionId() during normal
3193 : * operation using individual extend steps. Note that we do not need
3194 : * to extend clog since its extensions are WAL logged.
3195 : *
3196 : * This part has to be done regardless of standbyState since we
3197 : * immediately start assigning subtransactions to their toplevel
3198 : * transactions.
3199 : */
3200 0 : next_expected_xid = latestObservedXid;
3201 0 : while (TransactionIdPrecedes(next_expected_xid, xid))
3202 : {
3203 0 : TransactionIdAdvance(next_expected_xid);
3204 0 : ExtendSUBTRANS(next_expected_xid);
3205 : }
3206 0 : Assert(next_expected_xid == xid);
3207 :
3208 : /*
3209 : * If the KnownAssignedXids machinery isn't up yet, there's nothing
3210 : * more to do since we don't track assigned xids yet.
3211 : */
3212 0 : if (standbyState <= STANDBY_INITIALIZED)
3213 : {
3214 0 : latestObservedXid = xid;
3215 0 : return;
3216 : }
3217 :
3218 : /*
3219 : * Add (latestObservedXid, xid] onto the KnownAssignedXids array.
3220 : */
3221 0 : next_expected_xid = latestObservedXid;
3222 0 : TransactionIdAdvance(next_expected_xid);
3223 0 : KnownAssignedXidsAdd(next_expected_xid, xid, false);
3224 :
3225 : /*
3226 : * Now we can advance latestObservedXid
3227 : */
3228 0 : latestObservedXid = xid;
3229 :
3230 : /* ShmemVariableCache->nextXid must be beyond any observed xid */
3231 0 : next_expected_xid = latestObservedXid;
3232 0 : TransactionIdAdvance(next_expected_xid);
3233 0 : LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
3234 0 : ShmemVariableCache->nextXid = next_expected_xid;
3235 0 : LWLockRelease(XidGenLock);
3236 : }
3237 : }
3238 :
3239 : /*
3240 : * ExpireTreeKnownAssignedTransactionIds
3241 : * Remove the given XIDs from KnownAssignedXids.
3242 : *
3243 : * Called during recovery in analogy with and in place of ProcArrayEndTransaction()
3244 : */
3245 : void
3246 0 : ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
3247 : TransactionId *subxids, TransactionId max_xid)
3248 : {
3249 0 : Assert(standbyState >= STANDBY_INITIALIZED);
3250 :
3251 : /*
3252 : * Uses same locking as transaction commit
3253 : */
3254 0 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3255 :
3256 0 : KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
3257 :
3258 : /* As in ProcArrayEndTransaction, advance latestCompletedXid */
3259 0 : if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid,
3260 : max_xid))
3261 0 : ShmemVariableCache->latestCompletedXid = max_xid;
3262 :
3263 0 : LWLockRelease(ProcArrayLock);
3264 0 : }
3265 :
3266 : /*
3267 : * ExpireAllKnownAssignedTransactionIds
3268 : * Remove all entries in KnownAssignedXids
3269 : */
3270 : void
3271 0 : ExpireAllKnownAssignedTransactionIds(void)
3272 : {
3273 0 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3274 0 : KnownAssignedXidsRemovePreceding(InvalidTransactionId);
3275 0 : LWLockRelease(ProcArrayLock);
3276 0 : }
3277 :
3278 : /*
3279 : * ExpireOldKnownAssignedTransactionIds
3280 : * Remove KnownAssignedXids entries preceding the given XID
3281 : */
3282 : void
3283 0 : ExpireOldKnownAssignedTransactionIds(TransactionId xid)
3284 : {
3285 0 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3286 0 : KnownAssignedXidsRemovePreceding(xid);
3287 0 : LWLockRelease(ProcArrayLock);
3288 0 : }
3289 :
3290 :
3291 : /*
3292 : * Private module functions to manipulate KnownAssignedXids
3293 : *
3294 : * There are 5 main uses of the KnownAssignedXids data structure:
3295 : *
3296 : * * backends taking snapshots - all valid XIDs need to be copied out
3297 : * * backends seeking to determine presence of a specific XID
3298 : * * startup process adding new known-assigned XIDs
3299 : * * startup process removing specific XIDs as transactions end
3300 : * * startup process pruning array when special WAL records arrive
3301 : *
3302 : * This data structure is known to be a hot spot during Hot Standby, so we
3303 : * go to some lengths to make these operations as efficient and as concurrent
3304 : * as possible.
3305 : *
3306 : * The XIDs are stored in an array in sorted order --- TransactionIdPrecedes
3307 : * order, to be exact --- to allow binary search for specific XIDs. Note:
3308 : * in general TransactionIdPrecedes would not provide a total order, but
3309 : * we know that the entries present at any instant should not extend across
3310 : * a large enough fraction of XID space to wrap around (the master would
3311 : * shut down for fear of XID wrap long before that happens). So it's OK to
3312 : * use TransactionIdPrecedes as a binary-search comparator.
3313 : *
3314 : * It's cheap to maintain the sortedness during insertions, since new known
3315 : * XIDs are always reported in XID order; we just append them at the right.
3316 : *
3317 : * To keep individual deletions cheap, we need to allow gaps in the array.
3318 : * This is implemented by marking array elements as valid or invalid using
3319 : * the parallel boolean array KnownAssignedXidsValid[]. A deletion is done
3320 : * by setting KnownAssignedXidsValid[i] to false, *without* clearing the
3321 : * XID entry itself. This preserves the property that the XID entries are
3322 : * sorted, so we can do binary searches easily. Periodically we compress
3323 : * out the unused entries; that's much cheaper than having to compress the
3324 : * array immediately on every deletion.
3325 : *
3326 : * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
3327 : * are those with indexes tail <= i < head; items outside this subscript range
3328 : * have unspecified contents. When head reaches the end of the array, we
3329 : * force compression of unused entries rather than wrapping around, since
3330 : * allowing wraparound would greatly complicate the search logic. We maintain
3331 : * an explicit tail pointer so that pruning of old XIDs can be done without
3332 : * immediately moving the array contents. In most cases only a small fraction
3333 : * of the array contains valid entries at any instant.
3334 : *
3335 : * Although only the startup process can ever change the KnownAssignedXids
3336 : * data structure, we still need interlocking so that standby backends will
3337 : * not observe invalid intermediate states. The convention is that backends
3338 : * must hold shared ProcArrayLock to examine the array. To remove XIDs from
3339 : * the array, the startup process must hold ProcArrayLock exclusively, for
3340 : * the usual transactional reasons (compare commit/abort of a transaction
3341 : * during normal running). Compressing unused entries out of the array
3342 : * likewise requires exclusive lock. To add XIDs to the array, we just insert
3343 : * them into slots to the right of the head pointer and then advance the head
3344 : * pointer. This wouldn't require any lock at all, except that on machines
3345 : * with weak memory ordering we need to be careful that other processors
3346 : * see the array element changes before they see the head pointer change.
3347 : * We handle this by using a spinlock to protect reads and writes of the
3348 : * head/tail pointers. (We could dispense with the spinlock if we were to
3349 : * create suitable memory access barrier primitives and use those instead.)
3350 : * The spinlock must be taken to read or write the head/tail pointers unless
3351 : * the caller holds ProcArrayLock exclusively.
3352 : *
3353 : * Algorithmic analysis:
3354 : *
3355 : * If we have a maximum of M slots, with N XIDs currently spread across
3356 : * S elements then we have N <= S <= M always.
3357 : *
3358 : * * Adding a new XID is O(1) and needs little locking (unless compression
3359 : * must happen)
3360 : * * Compressing the array is O(S) and requires exclusive lock
3361 : * * Removing an XID is O(logS) and requires exclusive lock
3362 : * * Taking a snapshot is O(S) and requires shared lock
3363 : * * Checking for an XID is O(logS) and requires shared lock
3364 : *
3365 : * In comparison, using a hash table for KnownAssignedXids would mean that
3366 : * taking snapshots would be O(M). If we can maintain S << M then the
3367 : * sorted array technique will deliver significantly faster snapshots.
3368 : * If we try to keep S too small then we will spend too much time compressing,
3369 : * so there is an optimal point for any workload mix. We use a heuristic to
3370 : * decide when to compress the array, though trimming also helps reduce
3371 : * frequency of compressing. The heuristic requires us to track the number of
3372 : * currently valid XIDs in the array.
3373 : */
3374 :
3375 :
3376 : /*
3377 : * Compress KnownAssignedXids by shifting valid data down to the start of the
3378 : * array, removing any gaps.
3379 : *
3380 : * A compression step is forced if "force" is true, otherwise we do it
3381 : * only if a heuristic indicates it's a good time to do it.
3382 : *
3383 : * Caller must hold ProcArrayLock in exclusive mode.
3384 : */
3385 : static void
3386 0 : KnownAssignedXidsCompress(bool force)
3387 : {
3388 : /* use volatile pointer to prevent code rearrangement */
3389 0 : volatile ProcArrayStruct *pArray = procArray;
3390 : int head,
3391 : tail;
3392 : int compress_index;
3393 : int i;
3394 :
3395 : /* no spinlock required since we hold ProcArrayLock exclusively */
3396 0 : head = pArray->headKnownAssignedXids;
3397 0 : tail = pArray->tailKnownAssignedXids;
3398 :
3399 0 : if (!force)
3400 : {
3401 : /*
3402 : * If we can choose how much to compress, use a heuristic to avoid
3403 : * compressing too often or not often enough.
3404 : *
3405 : * Heuristic is if we have a large enough current spread and less than
3406 : * 50% of the elements are currently in use, then compress. This
3407 : * should ensure we compress fairly infrequently. We could compress
3408 : * less often though the virtual array would spread out more and
3409 : * snapshots would become more expensive.
3410 : */
3411 0 : int nelements = head - tail;
3412 :
3413 0 : if (nelements < 4 * PROCARRAY_MAXPROCS ||
3414 0 : nelements < 2 * pArray->numKnownAssignedXids)
3415 0 : return;
3416 : }
3417 :
3418 : /*
3419 : * We compress the array by reading the valid values from tail to head,
3420 : * re-aligning data to 0th element.
3421 : */
3422 0 : compress_index = 0;
3423 0 : for (i = tail; i < head; i++)
3424 : {
3425 0 : if (KnownAssignedXidsValid[i])
3426 : {
3427 0 : KnownAssignedXids[compress_index] = KnownAssignedXids[i];
3428 0 : KnownAssignedXidsValid[compress_index] = true;
3429 0 : compress_index++;
3430 : }
3431 : }
3432 :
3433 0 : pArray->tailKnownAssignedXids = 0;
3434 0 : pArray->headKnownAssignedXids = compress_index;
3435 : }
3436 :
3437 : /*
3438 : * Add xids into KnownAssignedXids at the head of the array.
3439 : *
3440 : * xids from from_xid to to_xid, inclusive, are added to the array.
3441 : *
3442 : * If exclusive_lock is true then caller already holds ProcArrayLock in
3443 : * exclusive mode, so we need no extra locking here. Else caller holds no
3444 : * lock, so we need to be sure we maintain sufficient interlocks against
3445 : * concurrent readers. (Only the startup process ever calls this, so no need
3446 : * to worry about concurrent writers.)
3447 : */
3448 : static void
3449 0 : KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
3450 : bool exclusive_lock)
3451 : {
3452 : /* use volatile pointer to prevent code rearrangement */
3453 0 : volatile ProcArrayStruct *pArray = procArray;
3454 : TransactionId next_xid;
3455 : int head,
3456 : tail;
3457 : int nxids;
3458 : int i;
3459 :
3460 0 : Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
3461 :
3462 : /*
3463 : * Calculate how many array slots we'll need. Normally this is cheap; in
3464 : * the unusual case where the XIDs cross the wrap point, we do it the hard
3465 : * way.
3466 : */
3467 0 : if (to_xid >= from_xid)
3468 0 : nxids = to_xid - from_xid + 1;
3469 : else
3470 : {
3471 0 : nxids = 1;
3472 0 : next_xid = from_xid;
3473 0 : while (TransactionIdPrecedes(next_xid, to_xid))
3474 : {
3475 0 : nxids++;
3476 0 : TransactionIdAdvance(next_xid);
3477 : }
3478 : }
3479 :
3480 : /*
3481 : * Since only the startup process modifies the head/tail pointers, we
3482 : * don't need a lock to read them here.
3483 : */
3484 0 : head = pArray->headKnownAssignedXids;
3485 0 : tail = pArray->tailKnownAssignedXids;
3486 :
3487 0 : Assert(head >= 0 && head <= pArray->maxKnownAssignedXids);
3488 0 : Assert(tail >= 0 && tail < pArray->maxKnownAssignedXids);
3489 :
3490 : /*
3491 : * Verify that insertions occur in TransactionId sequence. Note that even
3492 : * if the last existing element is marked invalid, it must still have a
3493 : * correctly sequenced XID value.
3494 : */
3495 0 : if (head > tail &&
3496 0 : TransactionIdFollowsOrEquals(KnownAssignedXids[head - 1], from_xid))
3497 : {
3498 0 : KnownAssignedXidsDisplay(LOG);
3499 0 : elog(ERROR, "out-of-order XID insertion in KnownAssignedXids");
3500 : }
3501 :
3502 : /*
3503 : * If our xids won't fit in the remaining space, compress out free space
3504 : */
3505 0 : if (head + nxids > pArray->maxKnownAssignedXids)
3506 : {
3507 : /* must hold lock to compress */
3508 0 : if (!exclusive_lock)
3509 0 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3510 :
3511 0 : KnownAssignedXidsCompress(true);
3512 :
3513 0 : head = pArray->headKnownAssignedXids;
3514 : /* note: we no longer care about the tail pointer */
3515 :
3516 0 : if (!exclusive_lock)
3517 0 : LWLockRelease(ProcArrayLock);
3518 :
3519 : /*
3520 : * If it still won't fit then we're out of memory
3521 : */
3522 0 : if (head + nxids > pArray->maxKnownAssignedXids)
3523 0 : elog(ERROR, "too many KnownAssignedXids");
3524 : }
3525 :
3526 : /* Now we can insert the xids into the space starting at head */
3527 0 : next_xid = from_xid;
3528 0 : for (i = 0; i < nxids; i++)
3529 : {
3530 0 : KnownAssignedXids[head] = next_xid;
3531 0 : KnownAssignedXidsValid[head] = true;
3532 0 : TransactionIdAdvance(next_xid);
3533 0 : head++;
3534 : }
3535 :
3536 : /* Adjust count of number of valid entries */
3537 0 : pArray->numKnownAssignedXids += nxids;
3538 :
3539 : /*
3540 : * Now update the head pointer. We use a spinlock to protect this
3541 : * pointer, not because the update is likely to be non-atomic, but to
3542 : * ensure that other processors see the above array updates before they
3543 : * see the head pointer change.
3544 : *
3545 : * If we're holding ProcArrayLock exclusively, there's no need to take the
3546 : * spinlock.
3547 : */
3548 0 : if (exclusive_lock)
3549 0 : pArray->headKnownAssignedXids = head;
3550 : else
3551 : {
3552 0 : SpinLockAcquire(&pArray->known_assigned_xids_lck);
3553 0 : pArray->headKnownAssignedXids = head;
3554 0 : SpinLockRelease(&pArray->known_assigned_xids_lck);
3555 : }
3556 0 : }
3557 :
3558 : /*
3559 : * KnownAssignedXidsSearch
3560 : *
3561 : * Searches KnownAssignedXids for a specific xid and optionally removes it.
3562 : * Returns true if it was found, false if not.
3563 : *
3564 : * Caller must hold ProcArrayLock in shared or exclusive mode.
3565 : * Exclusive lock must be held for remove = true.
3566 : */
3567 : static bool
3568 0 : KnownAssignedXidsSearch(TransactionId xid, bool remove)
3569 : {
3570 : /* use volatile pointer to prevent code rearrangement */
3571 0 : volatile ProcArrayStruct *pArray = procArray;
3572 : int first,
3573 : last;
3574 : int head;
3575 : int tail;
3576 0 : int result_index = -1;
3577 :
3578 0 : if (remove)
3579 : {
3580 : /* we hold ProcArrayLock exclusively, so no need for spinlock */
3581 0 : tail = pArray->tailKnownAssignedXids;
3582 0 : head = pArray->headKnownAssignedXids;
3583 : }
3584 : else
3585 : {
3586 : /* take spinlock to ensure we see up-to-date array contents */
3587 0 : SpinLockAcquire(&pArray->known_assigned_xids_lck);
3588 0 : tail = pArray->tailKnownAssignedXids;
3589 0 : head = pArray->headKnownAssignedXids;
3590 0 : SpinLockRelease(&pArray->known_assigned_xids_lck);
3591 : }
3592 :
3593 : /*
3594 : * Standard binary search. Note we can ignore the KnownAssignedXidsValid
3595 : * array here, since even invalid entries will contain sorted XIDs.
3596 : */
3597 0 : first = tail;
3598 0 : last = head - 1;
3599 0 : while (first <= last)
3600 : {
3601 : int mid_index;
3602 : TransactionId mid_xid;
3603 :
3604 0 : mid_index = (first + last) / 2;
3605 0 : mid_xid = KnownAssignedXids[mid_index];
3606 :
3607 0 : if (xid == mid_xid)
3608 : {
3609 0 : result_index = mid_index;
3610 0 : break;
3611 : }
3612 0 : else if (TransactionIdPrecedes(xid, mid_xid))
3613 0 : last = mid_index - 1;
3614 : else
3615 0 : first = mid_index + 1;
3616 : }
3617 :
3618 0 : if (result_index < 0)
3619 0 : return false; /* not in array */
3620 :
3621 0 : if (!KnownAssignedXidsValid[result_index])
3622 0 : return false; /* in array, but invalid */
3623 :
3624 0 : if (remove)
3625 : {
3626 0 : KnownAssignedXidsValid[result_index] = false;
3627 :
3628 0 : pArray->numKnownAssignedXids--;
3629 0 : Assert(pArray->numKnownAssignedXids >= 0);
3630 :
3631 : /*
3632 : * If we're removing the tail element then advance tail pointer over
3633 : * any invalid elements. This will speed future searches.
3634 : */
3635 0 : if (result_index == tail)
3636 : {
3637 0 : tail++;
3638 0 : while (tail < head && !KnownAssignedXidsValid[tail])
3639 0 : tail++;
3640 0 : if (tail >= head)
3641 : {
3642 : /* Array is empty, so we can reset both pointers */
3643 0 : pArray->headKnownAssignedXids = 0;
3644 0 : pArray->tailKnownAssignedXids = 0;
3645 : }
3646 : else
3647 : {
3648 0 : pArray->tailKnownAssignedXids = tail;
3649 : }
3650 : }
3651 : }
3652 :
3653 0 : return true;
3654 : }
3655 :
3656 : /*
3657 : * Is the specified XID present in KnownAssignedXids[]?
3658 : *
3659 : * Caller must hold ProcArrayLock in shared or exclusive mode.
3660 : */
3661 : static bool
3662 0 : KnownAssignedXidExists(TransactionId xid)
3663 : {
3664 0 : Assert(TransactionIdIsValid(xid));
3665 :
3666 0 : return KnownAssignedXidsSearch(xid, false);
3667 : }
3668 :
3669 : /*
3670 : * Remove the specified XID from KnownAssignedXids[].
3671 : *
3672 : * Caller must hold ProcArrayLock in exclusive mode.
3673 : */
3674 : static void
3675 0 : KnownAssignedXidsRemove(TransactionId xid)
3676 : {
3677 0 : Assert(TransactionIdIsValid(xid));
3678 :
3679 0 : elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
3680 :
3681 : /*
3682 : * Note: we cannot consider it an error to remove an XID that's not
3683 : * present. We intentionally remove subxact IDs while processing
3684 : * XLOG_XACT_ASSIGNMENT, to avoid array overflow. Then those XIDs will be
3685 : * removed again when the top-level xact commits or aborts.
3686 : *
3687 : * It might be possible to track such XIDs to distinguish this case from
3688 : * actual errors, but it would be complicated and probably not worth it.
3689 : * So, just ignore the search result.
3690 : */
3691 0 : (void) KnownAssignedXidsSearch(xid, true);
3692 0 : }
3693 :
3694 : /*
3695 : * KnownAssignedXidsRemoveTree
3696 : * Remove xid (if it's not InvalidTransactionId) and all the subxids.
3697 : *
3698 : * Caller must hold ProcArrayLock in exclusive mode.
3699 : */
3700 : static void
3701 0 : KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
3702 : TransactionId *subxids)
3703 : {
3704 : int i;
3705 :
3706 0 : if (TransactionIdIsValid(xid))
3707 0 : KnownAssignedXidsRemove(xid);
3708 :
3709 0 : for (i = 0; i < nsubxids; i++)
3710 0 : KnownAssignedXidsRemove(subxids[i]);
3711 :
3712 : /* Opportunistically compress the array */
3713 0 : KnownAssignedXidsCompress(false);
3714 0 : }
3715 :
3716 : /*
3717 : * Prune KnownAssignedXids up to, but *not* including xid. If xid is invalid
3718 : * then clear the whole table.
3719 : *
3720 : * Caller must hold ProcArrayLock in exclusive mode.
3721 : */
3722 : static void
3723 0 : KnownAssignedXidsRemovePreceding(TransactionId removeXid)
3724 : {
3725 : /* use volatile pointer to prevent code rearrangement */
3726 0 : volatile ProcArrayStruct *pArray = procArray;
3727 0 : int count = 0;
3728 : int head,
3729 : tail,
3730 : i;
3731 :
3732 0 : if (!TransactionIdIsValid(removeXid))
3733 : {
3734 0 : elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
3735 0 : pArray->numKnownAssignedXids = 0;
3736 0 : pArray->headKnownAssignedXids = pArray->tailKnownAssignedXids = 0;
3737 0 : return;
3738 : }
3739 :
3740 0 : elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
3741 :
3742 : /*
3743 : * Mark entries invalid starting at the tail. Since array is sorted, we
3744 : * can stop as soon as we reach an entry >= removeXid.
3745 : */
3746 0 : tail = pArray->tailKnownAssignedXids;
3747 0 : head = pArray->headKnownAssignedXids;
3748 :
3749 0 : for (i = tail; i < head; i++)
3750 : {
3751 0 : if (KnownAssignedXidsValid[i])
3752 : {
3753 0 : TransactionId knownXid = KnownAssignedXids[i];
3754 :
3755 0 : if (TransactionIdFollowsOrEquals(knownXid, removeXid))
3756 0 : break;
3757 :
3758 0 : if (!StandbyTransactionIdIsPrepared(knownXid))
3759 : {
3760 0 : KnownAssignedXidsValid[i] = false;
3761 0 : count++;
3762 : }
3763 : }
3764 : }
3765 :
3766 0 : pArray->numKnownAssignedXids -= count;
3767 0 : Assert(pArray->numKnownAssignedXids >= 0);
3768 :
3769 : /*
3770 : * Advance the tail pointer if we've marked the tail item invalid.
3771 : */
3772 0 : for (i = tail; i < head; i++)
3773 : {
3774 0 : if (KnownAssignedXidsValid[i])
3775 0 : break;
3776 : }
3777 0 : if (i >= head)
3778 : {
3779 : /* Array is empty, so we can reset both pointers */
3780 0 : pArray->headKnownAssignedXids = 0;
3781 0 : pArray->tailKnownAssignedXids = 0;
3782 : }
3783 : else
3784 : {
3785 0 : pArray->tailKnownAssignedXids = i;
3786 : }
3787 :
3788 : /* Opportunistically compress the array */
3789 0 : KnownAssignedXidsCompress(false);
3790 : }
3791 :
3792 : /*
3793 : * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
3794 : * We filter out anything >= xmax.
3795 : *
3796 : * Returns the number of XIDs stored into xarray[]. Caller is responsible
3797 : * that array is large enough.
3798 : *
3799 : * Caller must hold ProcArrayLock in (at least) shared mode.
3800 : */
3801 : static int
3802 0 : KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
3803 : {
3804 0 : TransactionId xtmp = InvalidTransactionId;
3805 :
3806 0 : return KnownAssignedXidsGetAndSetXmin(xarray, &xtmp, xmax);
3807 : }
3808 :
3809 : /*
3810 : * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus
3811 : * we reduce *xmin to the lowest xid value seen if not already lower.
3812 : *
3813 : * Caller must hold ProcArrayLock in (at least) shared mode.
3814 : */
3815 : static int
3816 0 : KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
3817 : TransactionId xmax)
3818 : {
3819 0 : int count = 0;
3820 : int head,
3821 : tail;
3822 : int i;
3823 :
3824 : /*
3825 : * Fetch head just once, since it may change while we loop. We can stop
3826 : * once we reach the initially seen head, since we are certain that an xid
3827 : * cannot enter and then leave the array while we hold ProcArrayLock. We
3828 : * might miss newly-added xids, but they should be >= xmax so irrelevant
3829 : * anyway.
3830 : *
3831 : * Must take spinlock to ensure we see up-to-date array contents.
3832 : */
3833 0 : SpinLockAcquire(&procArray->known_assigned_xids_lck);
3834 0 : tail = procArray->tailKnownAssignedXids;
3835 0 : head = procArray->headKnownAssignedXids;
3836 0 : SpinLockRelease(&procArray->known_assigned_xids_lck);
3837 :
3838 0 : for (i = tail; i < head; i++)
3839 : {
3840 : /* Skip any gaps in the array */
3841 0 : if (KnownAssignedXidsValid[i])
3842 : {
3843 0 : TransactionId knownXid = KnownAssignedXids[i];
3844 :
3845 : /*
3846 : * Update xmin if required. Only the first XID need be checked,
3847 : * since the array is sorted.
3848 : */
3849 0 : if (count == 0 &&
3850 0 : TransactionIdPrecedes(knownXid, *xmin))
3851 0 : *xmin = knownXid;
3852 :
3853 : /*
3854 : * Filter out anything >= xmax, again relying on sorted property
3855 : * of array.
3856 : */
3857 0 : if (TransactionIdIsValid(xmax) &&
3858 0 : TransactionIdFollowsOrEquals(knownXid, xmax))
3859 0 : break;
3860 :
3861 : /* Add knownXid into output array */
3862 0 : xarray[count++] = knownXid;
3863 : }
3864 : }
3865 :
3866 0 : return count;
3867 : }
3868 :
3869 : /*
3870 : * Get oldest XID in the KnownAssignedXids array, or InvalidTransactionId
3871 : * if nothing there.
3872 : */
3873 : static TransactionId
3874 0 : KnownAssignedXidsGetOldestXmin(void)
3875 : {
3876 : int head,
3877 : tail;
3878 : int i;
3879 :
3880 : /*
3881 : * Fetch head just once, since it may change while we loop.
3882 : */
3883 0 : SpinLockAcquire(&procArray->known_assigned_xids_lck);
3884 0 : tail = procArray->tailKnownAssignedXids;
3885 0 : head = procArray->headKnownAssignedXids;
3886 0 : SpinLockRelease(&procArray->known_assigned_xids_lck);
3887 :
3888 0 : for (i = tail; i < head; i++)
3889 : {
3890 : /* Skip any gaps in the array */
3891 0 : if (KnownAssignedXidsValid[i])
3892 0 : return KnownAssignedXids[i];
3893 : }
3894 :
3895 0 : return InvalidTransactionId;
3896 : }
3897 :
3898 : /*
3899 : * Display KnownAssignedXids to provide debug trail
3900 : *
3901 : * Currently this is only called within startup process, so we need no
3902 : * special locking.
3903 : *
3904 : * Note this is pretty expensive, and much of the expense will be incurred
3905 : * even if the elog message will get discarded. It's not currently called
3906 : * in any performance-critical places, however, so no need to be tenser.
3907 : */
3908 : static void
3909 0 : KnownAssignedXidsDisplay(int trace_level)
3910 : {
3911 : /* use volatile pointer to prevent code rearrangement */
3912 0 : volatile ProcArrayStruct *pArray = procArray;
3913 : StringInfoData buf;
3914 : int head,
3915 : tail,
3916 : i;
3917 0 : int nxids = 0;
3918 :
3919 0 : tail = pArray->tailKnownAssignedXids;
3920 0 : head = pArray->headKnownAssignedXids;
3921 :
3922 0 : initStringInfo(&buf);
3923 :
3924 0 : for (i = tail; i < head; i++)
3925 : {
3926 0 : if (KnownAssignedXidsValid[i])
3927 : {
3928 0 : nxids++;
3929 0 : appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
3930 : }
3931 : }
3932 :
3933 0 : elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",
3934 : nxids,
3935 : pArray->numKnownAssignedXids,
3936 : pArray->tailKnownAssignedXids,
3937 : pArray->headKnownAssignedXids,
3938 : buf.data);
3939 :
3940 0 : pfree(buf.data);
3941 0 : }
3942 :
3943 : /*
3944 : * KnownAssignedXidsReset
3945 : * Resets KnownAssignedXids to be empty
3946 : */
3947 : static void
3948 0 : KnownAssignedXidsReset(void)
3949 : {
3950 : /* use volatile pointer to prevent code rearrangement */
3951 0 : volatile ProcArrayStruct *pArray = procArray;
3952 :
3953 0 : LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3954 :
3955 0 : pArray->numKnownAssignedXids = 0;
3956 0 : pArray->tailKnownAssignedXids = 0;
3957 0 : pArray->headKnownAssignedXids = 0;
3958 :
3959 0 : LWLockRelease(ProcArrayLock);
3960 0 : }
|