Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * predicate.c
4 : * POSTGRES predicate locking
5 : * to support full serializable transaction isolation
6 : *
7 : *
8 : * The approach taken is to implement Serializable Snapshot Isolation (SSI)
9 : * as initially described in this paper:
10 : *
11 : * Michael J. Cahill, Uwe Röhm, and Alan D. Fekete. 2008.
12 : * Serializable isolation for snapshot databases.
13 : * In SIGMOD '08: Proceedings of the 2008 ACM SIGMOD
14 : * international conference on Management of data,
15 : * pages 729-738, New York, NY, USA. ACM.
16 : * http://doi.acm.org/10.1145/1376616.1376690
17 : *
18 : * and further elaborated in Cahill's doctoral thesis:
19 : *
20 : * Michael James Cahill. 2009.
21 : * Serializable Isolation for Snapshot Databases.
22 : * Sydney Digital Theses.
23 : * University of Sydney, School of Information Technologies.
24 : * http://hdl.handle.net/2123/5353
25 : *
26 : *
27 : * Predicate locks for Serializable Snapshot Isolation (SSI) are SIREAD
28 : * locks, which are so different from normal locks that a distinct set of
29 : * structures is required to handle them. They are needed to detect
30 : * rw-conflicts when the read happens before the write. (When the write
31 : * occurs first, the reading transaction can check for a conflict by
32 : * examining the MVCC data.)
33 : *
34 : * (1) Besides tuples actually read, they must cover ranges of tuples
35 : * which would have been read based on the predicate. This will
36 : * require modelling the predicates through locks against database
37 : * objects such as pages, index ranges, or entire tables.
38 : *
39 : * (2) They must be kept in RAM for quick access. Because of this, it
40 : * isn't possible to always maintain tuple-level granularity -- when
41 : * the space allocated to store these approaches exhaustion, a
42 : * request for a lock may need to scan for situations where a single
43 : * transaction holds many fine-grained locks which can be coalesced
44 : * into a single coarser-grained lock.
45 : *
46 : * (3) They never block anything; they are more like flags than locks
47 : * in that regard; although they refer to database objects and are
48 : * used to identify rw-conflicts with normal write locks.
49 : *
50 : * (4) While they are associated with a transaction, they must survive
51 : * a successful COMMIT of that transaction, and remain until all
52 : * overlapping transactions complete. This even means that they
53 : * must survive termination of the transaction's process. If a
54 : * top level transaction is rolled back, however, it is immediately
55 : * flagged so that it can be ignored, and its SIREAD locks can be
56 : * released any time after that.
57 : *
58 : * (5) The only transactions which create SIREAD locks or check for
59 : * conflicts with them are serializable transactions.
60 : *
61 : * (6) When a write lock for a top level transaction is found to cover
62 : * an existing SIREAD lock for the same transaction, the SIREAD lock
63 : * can be deleted.
64 : *
65 : * (7) A write from a serializable transaction must ensure that an xact
66 : * record exists for the transaction, with the same lifespan (until
67 : * all concurrent transaction complete or the transaction is rolled
68 : * back) so that rw-dependencies to that transaction can be
69 : * detected.
70 : *
71 : * We use an optimization for read-only transactions. Under certain
72 : * circumstances, a read-only transaction's snapshot can be shown to
73 : * never have conflicts with other transactions. This is referred to
74 : * as a "safe" snapshot (and one known not to be is "unsafe").
75 : * However, it can't be determined whether a snapshot is safe until
76 : * all concurrent read/write transactions complete.
77 : *
78 : * Once a read-only transaction is known to have a safe snapshot, it
79 : * can release its predicate locks and exempt itself from further
80 : * predicate lock tracking. READ ONLY DEFERRABLE transactions run only
81 : * on safe snapshots, waiting as necessary for one to be available.
82 : *
83 : *
84 : * Lightweight locks to manage access to the predicate locking shared
85 : * memory objects must be taken in this order, and should be released in
86 : * reverse order:
87 : *
88 : * SerializableFinishedListLock
89 : * - Protects the list of transactions which have completed but which
90 : * may yet matter because they overlap still-active transactions.
91 : *
92 : * SerializablePredicateLockListLock
93 : * - Protects the linked list of locks held by a transaction. Note
94 : * that the locks themselves are also covered by the partition
95 : * locks of their respective lock targets; this lock only affects
96 : * the linked list connecting the locks related to a transaction.
97 : * - All transactions share this single lock (with no partitioning).
98 : * - There is never a need for a process other than the one running
99 : * an active transaction to walk the list of locks held by that
100 : * transaction.
101 : * - It is relatively infrequent that another process needs to
102 : * modify the list for a transaction, but it does happen for such
103 : * things as index page splits for pages with predicate locks and
104 : * freeing of predicate locked pages by a vacuum process. When
105 : * removing a lock in such cases, the lock itself contains the
106 : * pointers needed to remove it from the list. When adding a
107 : * lock in such cases, the lock can be added using the anchor in
108 : * the transaction structure. Neither requires walking the list.
109 : * - Cleaning up the list for a terminated transaction is sometimes
110 : * not done on a retail basis, in which case no lock is required.
111 : * - Due to the above, a process accessing its active transaction's
112 : * list always uses a shared lock, regardless of whether it is
113 : * walking or maintaining the list. This improves concurrency
114 : * for the common access patterns.
115 : * - A process which needs to alter the list of a transaction other
116 : * than its own active transaction must acquire an exclusive
117 : * lock.
118 : *
119 : * PredicateLockHashPartitionLock(hashcode)
120 : * - The same lock protects a target, all locks on that target, and
121 : * the linked list of locks on the target.
122 : * - When more than one is needed, acquire in ascending address order.
123 : * - When all are needed (rare), acquire in ascending index order with
124 : * PredicateLockHashPartitionLockByIndex(index).
125 : *
126 : * SerializableXactHashLock
127 : * - Protects both PredXact and SerializableXidHash.
128 : *
129 : *
130 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
131 : * Portions Copyright (c) 1994, Regents of the University of California
132 : *
133 : *
134 : * IDENTIFICATION
135 : * src/backend/storage/lmgr/predicate.c
136 : *
137 : *-------------------------------------------------------------------------
138 : */
139 : /*
140 : * INTERFACE ROUTINES
141 : *
142 : * housekeeping for setting up shared memory predicate lock structures
143 : * InitPredicateLocks(void)
144 : * PredicateLockShmemSize(void)
145 : *
146 : * predicate lock reporting
147 : * GetPredicateLockStatusData(void)
148 : * PageIsPredicateLocked(Relation relation, BlockNumber blkno)
149 : *
150 : * predicate lock maintenance
151 : * GetSerializableTransactionSnapshot(Snapshot snapshot)
152 : * SetSerializableTransactionSnapshot(Snapshot snapshot,
153 : * VirtualTransactionId *sourcevxid)
154 : * RegisterPredicateLockingXid(void)
155 : * PredicateLockRelation(Relation relation, Snapshot snapshot)
156 : * PredicateLockPage(Relation relation, BlockNumber blkno,
157 : * Snapshot snapshot)
158 : * PredicateLockTuple(Relation relation, HeapTuple tuple,
159 : * Snapshot snapshot)
160 : * PredicateLockPageSplit(Relation relation, BlockNumber oldblkno,
161 : * BlockNumber newblkno)
162 : * PredicateLockPageCombine(Relation relation, BlockNumber oldblkno,
163 : * BlockNumber newblkno)
164 : * TransferPredicateLocksToHeapRelation(Relation relation)
165 : * ReleasePredicateLocks(bool isCommit)
166 : *
167 : * conflict detection (may also trigger rollback)
168 : * CheckForSerializableConflictOut(bool visible, Relation relation,
169 : * HeapTupleData *tup, Buffer buffer,
170 : * Snapshot snapshot)
171 : * CheckForSerializableConflictIn(Relation relation, HeapTupleData *tup,
172 : * Buffer buffer)
173 : * CheckTableForSerializableConflictIn(Relation relation)
174 : *
175 : * final rollback checking
176 : * PreCommit_CheckForSerializationFailure(void)
177 : *
178 : * two-phase commit support
179 : * AtPrepare_PredicateLocks(void);
180 : * PostPrepare_PredicateLocks(TransactionId xid);
181 : * PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit);
182 : * predicatelock_twophase_recover(TransactionId xid, uint16 info,
183 : * void *recdata, uint32 len);
184 : */
185 :
186 : #include "postgres.h"
187 :
188 : #include "access/htup_details.h"
189 : #include "access/slru.h"
190 : #include "access/subtrans.h"
191 : #include "access/transam.h"
192 : #include "access/twophase.h"
193 : #include "access/twophase_rmgr.h"
194 : #include "access/xact.h"
195 : #include "access/xlog.h"
196 : #include "miscadmin.h"
197 : #include "pgstat.h"
198 : #include "storage/bufmgr.h"
199 : #include "storage/predicate.h"
200 : #include "storage/predicate_internals.h"
201 : #include "storage/proc.h"
202 : #include "storage/procarray.h"
203 : #include "utils/rel.h"
204 : #include "utils/snapmgr.h"
205 : #include "utils/tqual.h"
206 :
207 : /* Uncomment the next line to test the graceful degradation code. */
208 : /* #define TEST_OLDSERXID */
209 :
210 : /*
211 : * Test the most selective fields first, for performance.
212 : *
213 : * a is covered by b if all of the following hold:
214 : * 1) a.database = b.database
215 : * 2) a.relation = b.relation
216 : * 3) b.offset is invalid (b is page-granularity or higher)
217 : * 4) either of the following:
218 : * 4a) a.offset is valid (a is tuple-granularity) and a.page = b.page
219 : * or 4b) a.offset is invalid and b.page is invalid (a is
220 : * page-granularity and b is relation-granularity
221 : */
222 : #define TargetTagIsCoveredBy(covered_target, covering_target) \
223 : ((GET_PREDICATELOCKTARGETTAG_RELATION(covered_target) == /* (2) */ \
224 : GET_PREDICATELOCKTARGETTAG_RELATION(covering_target)) \
225 : && (GET_PREDICATELOCKTARGETTAG_OFFSET(covering_target) == \
226 : InvalidOffsetNumber) /* (3) */ \
227 : && (((GET_PREDICATELOCKTARGETTAG_OFFSET(covered_target) != \
228 : InvalidOffsetNumber) /* (4a) */ \
229 : && (GET_PREDICATELOCKTARGETTAG_PAGE(covering_target) == \
230 : GET_PREDICATELOCKTARGETTAG_PAGE(covered_target))) \
231 : || ((GET_PREDICATELOCKTARGETTAG_PAGE(covering_target) == \
232 : InvalidBlockNumber) /* (4b) */ \
233 : && (GET_PREDICATELOCKTARGETTAG_PAGE(covered_target) \
234 : != InvalidBlockNumber))) \
235 : && (GET_PREDICATELOCKTARGETTAG_DB(covered_target) == /* (1) */ \
236 : GET_PREDICATELOCKTARGETTAG_DB(covering_target)))
237 :
238 : /*
239 : * The predicate locking target and lock shared hash tables are partitioned to
240 : * reduce contention. To determine which partition a given target belongs to,
241 : * compute the tag's hash code with PredicateLockTargetTagHashCode(), then
242 : * apply one of these macros.
243 : * NB: NUM_PREDICATELOCK_PARTITIONS must be a power of 2!
244 : */
245 : #define PredicateLockHashPartition(hashcode) \
246 : ((hashcode) % NUM_PREDICATELOCK_PARTITIONS)
247 : #define PredicateLockHashPartitionLock(hashcode) \
248 : (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + \
249 : PredicateLockHashPartition(hashcode)].lock)
250 : #define PredicateLockHashPartitionLockByIndex(i) \
251 : (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
252 :
253 : #define NPREDICATELOCKTARGETENTS() \
254 : mul_size(max_predicate_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
255 :
256 : #define SxactIsOnFinishedList(sxact) (!SHMQueueIsDetached(&((sxact)->finishedLink)))
257 :
258 : /*
259 : * Note that a sxact is marked "prepared" once it has passed
260 : * PreCommit_CheckForSerializationFailure, even if it isn't using
261 : * 2PC. This is the point at which it can no longer be aborted.
262 : *
263 : * The PREPARED flag remains set after commit, so SxactIsCommitted
264 : * implies SxactIsPrepared.
265 : */
266 : #define SxactIsCommitted(sxact) (((sxact)->flags & SXACT_FLAG_COMMITTED) != 0)
267 : #define SxactIsPrepared(sxact) (((sxact)->flags & SXACT_FLAG_PREPARED) != 0)
268 : #define SxactIsRolledBack(sxact) (((sxact)->flags & SXACT_FLAG_ROLLED_BACK) != 0)
269 : #define SxactIsDoomed(sxact) (((sxact)->flags & SXACT_FLAG_DOOMED) != 0)
270 : #define SxactIsReadOnly(sxact) (((sxact)->flags & SXACT_FLAG_READ_ONLY) != 0)
271 : #define SxactHasSummaryConflictIn(sxact) (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_IN) != 0)
272 : #define SxactHasSummaryConflictOut(sxact) (((sxact)->flags & SXACT_FLAG_SUMMARY_CONFLICT_OUT) != 0)
273 : /*
274 : * The following macro actually means that the specified transaction has a
275 : * conflict out *to a transaction which committed ahead of it*. It's hard
276 : * to get that into a name of a reasonable length.
277 : */
278 : #define SxactHasConflictOut(sxact) (((sxact)->flags & SXACT_FLAG_CONFLICT_OUT) != 0)
279 : #define SxactIsDeferrableWaiting(sxact) (((sxact)->flags & SXACT_FLAG_DEFERRABLE_WAITING) != 0)
280 : #define SxactIsROSafe(sxact) (((sxact)->flags & SXACT_FLAG_RO_SAFE) != 0)
281 : #define SxactIsROUnsafe(sxact) (((sxact)->flags & SXACT_FLAG_RO_UNSAFE) != 0)
282 :
283 : /*
284 : * Compute the hash code associated with a PREDICATELOCKTARGETTAG.
285 : *
286 : * To avoid unnecessary recomputations of the hash code, we try to do this
287 : * just once per function, and then pass it around as needed. Aside from
288 : * passing the hashcode to hash_search_with_hash_value(), we can extract
289 : * the lock partition number from the hashcode.
290 : */
291 : #define PredicateLockTargetTagHashCode(predicatelocktargettag) \
292 : get_hash_value(PredicateLockTargetHash, predicatelocktargettag)
293 :
294 : /*
295 : * Given a predicate lock tag, and the hash for its target,
296 : * compute the lock hash.
297 : *
298 : * To make the hash code also depend on the transaction, we xor the sxid
299 : * struct's address into the hash code, left-shifted so that the
300 : * partition-number bits don't change. Since this is only a hash, we
301 : * don't care if we lose high-order bits of the address; use an
302 : * intermediate variable to suppress cast-pointer-to-int warnings.
303 : */
304 : #define PredicateLockHashCodeFromTargetHashCode(predicatelocktag, targethash) \
305 : ((targethash) ^ ((uint32) PointerGetDatum((predicatelocktag)->myXact)) \
306 : << LOG2_NUM_PREDICATELOCK_PARTITIONS)
307 :
308 :
309 : /*
310 : * The SLRU buffer area through which we access the old xids.
311 : */
312 : static SlruCtlData OldSerXidSlruCtlData;
313 :
314 : #define OldSerXidSlruCtl (&OldSerXidSlruCtlData)
315 :
316 : #define OLDSERXID_PAGESIZE BLCKSZ
317 : #define OLDSERXID_ENTRYSIZE sizeof(SerCommitSeqNo)
318 : #define OLDSERXID_ENTRIESPERPAGE (OLDSERXID_PAGESIZE / OLDSERXID_ENTRYSIZE)
319 :
320 : /*
321 : * Set maximum pages based on the lesser of the number needed to track all
322 : * transactions and the maximum that SLRU supports.
323 : */
324 : #define OLDSERXID_MAX_PAGE Min(SLRU_PAGES_PER_SEGMENT * 0x10000 - 1, \
325 : (MaxTransactionId) / OLDSERXID_ENTRIESPERPAGE)
326 :
327 : #define OldSerXidNextPage(page) (((page) >= OLDSERXID_MAX_PAGE) ? 0 : (page) + 1)
328 :
329 : #define OldSerXidValue(slotno, xid) (*((SerCommitSeqNo *) \
330 : (OldSerXidSlruCtl->shared->page_buffer[slotno] + \
331 : ((((uint32) (xid)) % OLDSERXID_ENTRIESPERPAGE) * OLDSERXID_ENTRYSIZE))))
332 :
333 : #define OldSerXidPage(xid) ((((uint32) (xid)) / OLDSERXID_ENTRIESPERPAGE) % (OLDSERXID_MAX_PAGE + 1))
334 : #define OldSerXidSegment(page) ((page) / SLRU_PAGES_PER_SEGMENT)
335 :
336 : typedef struct OldSerXidControlData
337 : {
338 : int headPage; /* newest initialized page */
339 : TransactionId headXid; /* newest valid Xid in the SLRU */
340 : TransactionId tailXid; /* oldest xmin we might be interested in */
341 : bool warningIssued; /* have we issued SLRU wrap-around warning? */
342 : } OldSerXidControlData;
343 :
344 : typedef struct OldSerXidControlData *OldSerXidControl;
345 :
346 : static OldSerXidControl oldSerXidControl;
347 :
348 : /*
349 : * When the oldest committed transaction on the "finished" list is moved to
350 : * SLRU, its predicate locks will be moved to this "dummy" transaction,
351 : * collapsing duplicate targets. When a duplicate is found, the later
352 : * commitSeqNo is used.
353 : */
354 : static SERIALIZABLEXACT *OldCommittedSxact;
355 :
356 :
357 : /*
358 : * These configuration variables are used to set the predicate lock table size
359 : * and to control promotion of predicate locks to coarser granularity in an
360 : * attempt to degrade performance (mostly as false positive serialization
361 : * failure) gracefully in the face of memory pressurel
362 : */
363 : int max_predicate_locks_per_xact; /* set by guc.c */
364 : int max_predicate_locks_per_relation; /* set by guc.c */
365 : int max_predicate_locks_per_page; /* set by guc.c */
366 :
367 : /*
368 : * This provides a list of objects in order to track transactions
369 : * participating in predicate locking. Entries in the list are fixed size,
370 : * and reside in shared memory. The memory address of an entry must remain
371 : * fixed during its lifetime. The list will be protected from concurrent
372 : * update externally; no provision is made in this code to manage that. The
373 : * number of entries in the list, and the size allowed for each entry is
374 : * fixed upon creation.
375 : */
376 : static PredXactList PredXact;
377 :
378 : /*
379 : * This provides a pool of RWConflict data elements to use in conflict lists
380 : * between transactions.
381 : */
382 : static RWConflictPoolHeader RWConflictPool;
383 :
384 : /*
385 : * The predicate locking hash tables are in shared memory.
386 : * Each backend keeps pointers to them.
387 : */
388 : static HTAB *SerializableXidHash;
389 : static HTAB *PredicateLockTargetHash;
390 : static HTAB *PredicateLockHash;
391 : static SHM_QUEUE *FinishedSerializableTransactions;
392 :
393 : /*
394 : * Tag for a dummy entry in PredicateLockTargetHash. By temporarily removing
395 : * this entry, you can ensure that there's enough scratch space available for
396 : * inserting one entry in the hash table. This is an otherwise-invalid tag.
397 : */
398 : static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0};
399 : static uint32 ScratchTargetTagHash;
400 : static LWLock *ScratchPartitionLock;
401 :
402 : /*
403 : * The local hash table used to determine when to combine multiple fine-
404 : * grained locks into a single courser-grained lock.
405 : */
406 : static HTAB *LocalPredicateLockHash = NULL;
407 :
408 : /*
409 : * Keep a pointer to the currently-running serializable transaction (if any)
410 : * for quick reference. Also, remember if we have written anything that could
411 : * cause a rw-conflict.
412 : */
413 : static SERIALIZABLEXACT *MySerializableXact = InvalidSerializableXact;
414 : static bool MyXactDidWrite = false;
415 :
416 : /* local functions */
417 :
418 : static SERIALIZABLEXACT *CreatePredXact(void);
419 : static void ReleasePredXact(SERIALIZABLEXACT *sxact);
420 : static SERIALIZABLEXACT *FirstPredXact(void);
421 : static SERIALIZABLEXACT *NextPredXact(SERIALIZABLEXACT *sxact);
422 :
423 : static bool RWConflictExists(const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer);
424 : static void SetRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer);
425 : static void SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact, SERIALIZABLEXACT *activeXact);
426 : static void ReleaseRWConflict(RWConflict conflict);
427 : static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact);
428 :
429 : static bool OldSerXidPagePrecedesLogically(int p, int q);
430 : static void OldSerXidInit(void);
431 : static void OldSerXidAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo);
432 : static SerCommitSeqNo OldSerXidGetMinConflictCommitSeqNo(TransactionId xid);
433 : static void OldSerXidSetActiveSerXmin(TransactionId xid);
434 :
435 : static uint32 predicatelock_hash(const void *key, Size keysize);
436 : static void SummarizeOldestCommittedSxact(void);
437 : static Snapshot GetSafeSnapshot(Snapshot snapshot);
438 : static Snapshot GetSerializableTransactionSnapshotInt(Snapshot snapshot,
439 : VirtualTransactionId *sourcevxid,
440 : int sourcepid);
441 : static bool PredicateLockExists(const PREDICATELOCKTARGETTAG *targettag);
442 : static bool GetParentPredicateLockTag(const PREDICATELOCKTARGETTAG *tag,
443 : PREDICATELOCKTARGETTAG *parent);
444 : static bool CoarserLockCovers(const PREDICATELOCKTARGETTAG *newtargettag);
445 : static void RemoveScratchTarget(bool lockheld);
446 : static void RestoreScratchTarget(bool lockheld);
447 : static void RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target,
448 : uint32 targettaghash);
449 : static void DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag);
450 : static int MaxPredicateChildLocks(const PREDICATELOCKTARGETTAG *tag);
451 : static bool CheckAndPromotePredicateLockRequest(const PREDICATELOCKTARGETTAG *reqtag);
452 : static void DecrementParentLocks(const PREDICATELOCKTARGETTAG *targettag);
453 : static void CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag,
454 : uint32 targettaghash,
455 : SERIALIZABLEXACT *sxact);
456 : static void DeleteLockTarget(PREDICATELOCKTARGET *target, uint32 targettaghash);
457 : static bool TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag,
458 : PREDICATELOCKTARGETTAG newtargettag,
459 : bool removeOld);
460 : static void PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag);
461 : static void DropAllPredicateLocksFromTable(Relation relation,
462 : bool transfer);
463 : static void SetNewSxactGlobalXmin(void);
464 : static void ClearOldPredicateLocks(void);
465 : static void ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
466 : bool summarize);
467 : static bool XidIsConcurrent(TransactionId xid);
468 : static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag);
469 : static void FlagRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer);
470 : static void OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader,
471 : SERIALIZABLEXACT *writer);
472 :
473 :
474 : /*------------------------------------------------------------------------*/
475 :
476 : /*
477 : * Does this relation participate in predicate locking? Temporary and system
478 : * relations are exempt, as are materialized views.
479 : */
480 : static inline bool
481 1036 : PredicateLockingNeededForRelation(Relation relation)
482 : {
483 1573 : return !(relation->rd_id < FirstBootstrapObjectId ||
484 273 : RelationUsesLocalBuffers(relation) ||
485 264 : relation->rd_rel->relkind == RELKIND_MATVIEW);
486 : }
487 :
488 : /*
489 : * When a public interface method is called for a read, this is the test to
490 : * see if we should do a quick return.
491 : *
492 : * Note: this function has side-effects! If this transaction has been flagged
493 : * as RO-safe since the last call, we release all predicate locks and reset
494 : * MySerializableXact. That makes subsequent calls to return quickly.
495 : *
496 : * This is marked as 'inline' to make to eliminate the function call overhead
497 : * in the common case that serialization is not needed.
498 : */
499 : static inline bool
500 7039425 : SerializationNeededForRead(Relation relation, Snapshot snapshot)
501 : {
502 : /* Nothing to do if this is not a serializable transaction */
503 7039425 : if (MySerializableXact == InvalidSerializableXact)
504 7038685 : return false;
505 :
506 : /*
507 : * Don't acquire locks or conflict when scanning with a special snapshot.
508 : * This excludes things like CLUSTER and REINDEX. They use the wholesale
509 : * functions TransferPredicateLocksToHeapRelation() and
510 : * CheckTableForSerializableConflictIn() to participate in serialization,
511 : * but the scans involved don't need serialization.
512 : */
513 740 : if (!IsMVCCSnapshot(snapshot))
514 19 : return false;
515 :
516 : /*
517 : * Check if we have just become "RO-safe". If we have, immediately release
518 : * all locks as they're not needed anymore. This also resets
519 : * MySerializableXact, so that subsequent calls to this function can exit
520 : * quickly.
521 : *
522 : * A transaction is flagged as RO_SAFE if all concurrent R/W transactions
523 : * commit without having conflicts out to an earlier snapshot, thus
524 : * ensuring that no conflicts are possible for this transaction.
525 : */
526 721 : if (SxactIsROSafe(MySerializableXact))
527 : {
528 0 : ReleasePredicateLocks(false);
529 0 : return false;
530 : }
531 :
532 : /* Check if the relation doesn't participate in predicate locking */
533 721 : if (!PredicateLockingNeededForRelation(relation))
534 623 : return false;
535 :
536 98 : return true; /* no excuse to skip predicate locking */
537 : }
538 :
539 : /*
540 : * Like SerializationNeededForRead(), but called on writes.
541 : * The logic is the same, but there is no snapshot and we can't be RO-safe.
542 : */
543 : static inline bool
544 1063207 : SerializationNeededForWrite(Relation relation)
545 : {
546 : /* Nothing to do if this is not a serializable transaction */
547 1063207 : if (MySerializableXact == InvalidSerializableXact)
548 1063055 : return false;
549 :
550 : /* Check if the relation doesn't participate in predicate locking */
551 152 : if (!PredicateLockingNeededForRelation(relation))
552 133 : return false;
553 :
554 19 : return true; /* no excuse to skip predicate locking */
555 : }
556 :
557 :
558 : /*------------------------------------------------------------------------*/
559 :
560 : /*
561 : * These functions are a simple implementation of a list for this specific
562 : * type of struct. If there is ever a generalized shared memory list, we
563 : * should probably switch to that.
564 : */
565 : static SERIALIZABLEXACT *
566 18 : CreatePredXact(void)
567 : {
568 : PredXactListElement ptle;
569 :
570 18 : ptle = (PredXactListElement)
571 18 : SHMQueueNext(&PredXact->availableList,
572 18 : &PredXact->availableList,
573 : offsetof(PredXactListElementData, link));
574 18 : if (!ptle)
575 0 : return NULL;
576 :
577 18 : SHMQueueDelete(&ptle->link);
578 18 : SHMQueueInsertBefore(&PredXact->activeList, &ptle->link);
579 18 : return &ptle->sxact;
580 : }
581 :
582 : static void
583 13 : ReleasePredXact(SERIALIZABLEXACT *sxact)
584 : {
585 : PredXactListElement ptle;
586 :
587 13 : Assert(ShmemAddrIsValid(sxact));
588 :
589 13 : ptle = (PredXactListElement)
590 : (((char *) sxact)
591 : - offsetof(PredXactListElementData, sxact)
592 : + offsetof(PredXactListElementData, link));
593 13 : SHMQueueDelete(&ptle->link);
594 13 : SHMQueueInsertBefore(&PredXact->availableList, &ptle->link);
595 13 : }
596 :
597 : static SERIALIZABLEXACT *
598 8 : FirstPredXact(void)
599 : {
600 : PredXactListElement ptle;
601 :
602 8 : ptle = (PredXactListElement)
603 8 : SHMQueueNext(&PredXact->activeList,
604 8 : &PredXact->activeList,
605 : offsetof(PredXactListElementData, link));
606 8 : if (!ptle)
607 0 : return NULL;
608 :
609 8 : return &ptle->sxact;
610 : }
611 :
612 : static SERIALIZABLEXACT *
613 18 : NextPredXact(SERIALIZABLEXACT *sxact)
614 : {
615 : PredXactListElement ptle;
616 :
617 18 : Assert(ShmemAddrIsValid(sxact));
618 :
619 18 : ptle = (PredXactListElement)
620 : (((char *) sxact)
621 : - offsetof(PredXactListElementData, sxact)
622 : + offsetof(PredXactListElementData, link));
623 18 : ptle = (PredXactListElement)
624 18 : SHMQueueNext(&PredXact->activeList,
625 18 : &ptle->link,
626 : offsetof(PredXactListElementData, link));
627 18 : if (!ptle)
628 8 : return NULL;
629 :
630 10 : return &ptle->sxact;
631 : }
632 :
633 : /*------------------------------------------------------------------------*/
634 :
635 : /*
636 : * These functions manage primitive access to the RWConflict pool and lists.
637 : */
638 : static bool
639 8 : RWConflictExists(const SERIALIZABLEXACT *reader, const SERIALIZABLEXACT *writer)
640 : {
641 : RWConflict conflict;
642 :
643 8 : Assert(reader != writer);
644 :
645 : /* Check the ends of the purported conflict first. */
646 8 : if (SxactIsDoomed(reader)
647 8 : || SxactIsDoomed(writer)
648 8 : || SHMQueueEmpty(&reader->outConflicts)
649 1 : || SHMQueueEmpty(&writer->inConflicts))
650 7 : return false;
651 :
652 : /* A conflict is possible; walk the list to find out. */
653 1 : conflict = (RWConflict)
654 1 : SHMQueueNext(&reader->outConflicts,
655 : &reader->outConflicts,
656 : offsetof(RWConflictData, outLink));
657 2 : while (conflict)
658 : {
659 1 : if (conflict->sxactIn == writer)
660 1 : return true;
661 0 : conflict = (RWConflict)
662 0 : SHMQueueNext(&reader->outConflicts,
663 0 : &conflict->outLink,
664 : offsetof(RWConflictData, outLink));
665 : }
666 :
667 : /* No conflict found. */
668 0 : return false;
669 : }
670 :
671 : static void
672 2 : SetRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
673 : {
674 : RWConflict conflict;
675 :
676 2 : Assert(reader != writer);
677 2 : Assert(!RWConflictExists(reader, writer));
678 :
679 2 : conflict = (RWConflict)
680 2 : SHMQueueNext(&RWConflictPool->availableList,
681 2 : &RWConflictPool->availableList,
682 : offsetof(RWConflictData, outLink));
683 2 : if (!conflict)
684 0 : ereport(ERROR,
685 : (errcode(ERRCODE_OUT_OF_MEMORY),
686 : errmsg("not enough elements in RWConflictPool to record a read/write conflict"),
687 : errhint("You might need to run fewer transactions at a time or increase max_connections.")));
688 :
689 2 : SHMQueueDelete(&conflict->outLink);
690 :
691 2 : conflict->sxactOut = reader;
692 2 : conflict->sxactIn = writer;
693 2 : SHMQueueInsertBefore(&reader->outConflicts, &conflict->outLink);
694 2 : SHMQueueInsertBefore(&writer->inConflicts, &conflict->inLink);
695 2 : }
696 :
697 : static void
698 0 : SetPossibleUnsafeConflict(SERIALIZABLEXACT *roXact,
699 : SERIALIZABLEXACT *activeXact)
700 : {
701 : RWConflict conflict;
702 :
703 0 : Assert(roXact != activeXact);
704 0 : Assert(SxactIsReadOnly(roXact));
705 0 : Assert(!SxactIsReadOnly(activeXact));
706 :
707 0 : conflict = (RWConflict)
708 0 : SHMQueueNext(&RWConflictPool->availableList,
709 0 : &RWConflictPool->availableList,
710 : offsetof(RWConflictData, outLink));
711 0 : if (!conflict)
712 0 : ereport(ERROR,
713 : (errcode(ERRCODE_OUT_OF_MEMORY),
714 : errmsg("not enough elements in RWConflictPool to record a potential read/write conflict"),
715 : errhint("You might need to run fewer transactions at a time or increase max_connections.")));
716 :
717 0 : SHMQueueDelete(&conflict->outLink);
718 :
719 0 : conflict->sxactOut = activeXact;
720 0 : conflict->sxactIn = roXact;
721 0 : SHMQueueInsertBefore(&activeXact->possibleUnsafeConflicts,
722 : &conflict->outLink);
723 0 : SHMQueueInsertBefore(&roXact->possibleUnsafeConflicts,
724 : &conflict->inLink);
725 0 : }
726 :
727 : static void
728 2 : ReleaseRWConflict(RWConflict conflict)
729 : {
730 2 : SHMQueueDelete(&conflict->inLink);
731 2 : SHMQueueDelete(&conflict->outLink);
732 2 : SHMQueueInsertBefore(&RWConflictPool->availableList, &conflict->outLink);
733 2 : }
734 :
735 : static void
736 0 : FlagSxactUnsafe(SERIALIZABLEXACT *sxact)
737 : {
738 : RWConflict conflict,
739 : nextConflict;
740 :
741 0 : Assert(SxactIsReadOnly(sxact));
742 0 : Assert(!SxactIsROSafe(sxact));
743 :
744 0 : sxact->flags |= SXACT_FLAG_RO_UNSAFE;
745 :
746 : /*
747 : * We know this isn't a safe snapshot, so we can stop looking for other
748 : * potential conflicts.
749 : */
750 0 : conflict = (RWConflict)
751 0 : SHMQueueNext(&sxact->possibleUnsafeConflicts,
752 0 : &sxact->possibleUnsafeConflicts,
753 : offsetof(RWConflictData, inLink));
754 0 : while (conflict)
755 : {
756 0 : nextConflict = (RWConflict)
757 0 : SHMQueueNext(&sxact->possibleUnsafeConflicts,
758 0 : &conflict->inLink,
759 : offsetof(RWConflictData, inLink));
760 :
761 0 : Assert(!SxactIsReadOnly(conflict->sxactOut));
762 0 : Assert(sxact == conflict->sxactIn);
763 :
764 0 : ReleaseRWConflict(conflict);
765 :
766 0 : conflict = nextConflict;
767 : }
768 0 : }
769 :
770 : /*------------------------------------------------------------------------*/
771 :
772 : /*
773 : * We will work on the page range of 0..OLDSERXID_MAX_PAGE.
774 : * Compares using wraparound logic, as is required by slru.c.
775 : */
776 : static bool
777 0 : OldSerXidPagePrecedesLogically(int p, int q)
778 : {
779 : int diff;
780 :
781 : /*
782 : * We have to compare modulo (OLDSERXID_MAX_PAGE+1)/2. Both inputs should
783 : * be in the range 0..OLDSERXID_MAX_PAGE.
784 : */
785 0 : Assert(p >= 0 && p <= OLDSERXID_MAX_PAGE);
786 0 : Assert(q >= 0 && q <= OLDSERXID_MAX_PAGE);
787 :
788 0 : diff = p - q;
789 0 : if (diff >= ((OLDSERXID_MAX_PAGE + 1) / 2))
790 0 : diff -= OLDSERXID_MAX_PAGE + 1;
791 0 : else if (diff < -((int) (OLDSERXID_MAX_PAGE + 1) / 2))
792 0 : diff += OLDSERXID_MAX_PAGE + 1;
793 0 : return diff < 0;
794 : }
795 :
796 : /*
797 : * Initialize for the tracking of old serializable committed xids.
798 : */
799 : static void
800 5 : OldSerXidInit(void)
801 : {
802 : bool found;
803 :
804 : /*
805 : * Set up SLRU management of the pg_serial data.
806 : */
807 5 : OldSerXidSlruCtl->PagePrecedes = OldSerXidPagePrecedesLogically;
808 5 : SimpleLruInit(OldSerXidSlruCtl, "oldserxid",
809 5 : NUM_OLDSERXID_BUFFERS, 0, OldSerXidLock, "pg_serial",
810 : LWTRANCHE_OLDSERXID_BUFFERS);
811 : /* Override default assumption that writes should be fsync'd */
812 5 : OldSerXidSlruCtl->do_fsync = false;
813 :
814 : /*
815 : * Create or attach to the OldSerXidControl structure.
816 : */
817 5 : oldSerXidControl = (OldSerXidControl)
818 5 : ShmemInitStruct("OldSerXidControlData", sizeof(OldSerXidControlData), &found);
819 :
820 5 : Assert(found == IsUnderPostmaster);
821 5 : if (!found)
822 : {
823 : /*
824 : * Set control information to reflect empty SLRU.
825 : */
826 5 : oldSerXidControl->headPage = -1;
827 5 : oldSerXidControl->headXid = InvalidTransactionId;
828 5 : oldSerXidControl->tailXid = InvalidTransactionId;
829 5 : oldSerXidControl->warningIssued = false;
830 : }
831 5 : }
832 :
833 : /*
834 : * Record a committed read write serializable xid and the minimum
835 : * commitSeqNo of any transactions to which this xid had a rw-conflict out.
836 : * An invalid seqNo means that there were no conflicts out from xid.
837 : */
838 : static void
839 0 : OldSerXidAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo)
840 : {
841 : TransactionId tailXid;
842 : int targetPage;
843 : int slotno;
844 : int firstZeroPage;
845 : bool isNewPage;
846 :
847 0 : Assert(TransactionIdIsValid(xid));
848 :
849 0 : targetPage = OldSerXidPage(xid);
850 :
851 0 : LWLockAcquire(OldSerXidLock, LW_EXCLUSIVE);
852 :
853 : /*
854 : * If no serializable transactions are active, there shouldn't be anything
855 : * to push out to the SLRU. Hitting this assert would mean there's
856 : * something wrong with the earlier cleanup logic.
857 : */
858 0 : tailXid = oldSerXidControl->tailXid;
859 0 : Assert(TransactionIdIsValid(tailXid));
860 :
861 : /*
862 : * If the SLRU is currently unused, zero out the whole active region from
863 : * tailXid to headXid before taking it into use. Otherwise zero out only
864 : * any new pages that enter the tailXid-headXid range as we advance
865 : * headXid.
866 : */
867 0 : if (oldSerXidControl->headPage < 0)
868 : {
869 0 : firstZeroPage = OldSerXidPage(tailXid);
870 0 : isNewPage = true;
871 : }
872 : else
873 : {
874 0 : firstZeroPage = OldSerXidNextPage(oldSerXidControl->headPage);
875 0 : isNewPage = OldSerXidPagePrecedesLogically(oldSerXidControl->headPage,
876 : targetPage);
877 : }
878 :
879 0 : if (!TransactionIdIsValid(oldSerXidControl->headXid)
880 0 : || TransactionIdFollows(xid, oldSerXidControl->headXid))
881 0 : oldSerXidControl->headXid = xid;
882 0 : if (isNewPage)
883 0 : oldSerXidControl->headPage = targetPage;
884 :
885 : /*
886 : * Give a warning if we're about to run out of SLRU pages.
887 : *
888 : * slru.c has a maximum of 64k segments, with 32 (SLRU_PAGES_PER_SEGMENT)
889 : * pages each. We need to store a 64-bit integer for each Xid, and with
890 : * default 8k block size, 65536*32 pages is only enough to cover 2^30
891 : * XIDs. If we're about to hit that limit and wrap around, warn the user.
892 : *
893 : * To avoid spamming the user, we only give one warning when we've used 1
894 : * billion XIDs, and stay silent until the situation is fixed and the
895 : * number of XIDs used falls below 800 million again.
896 : *
897 : * XXX: We have no safeguard to actually *prevent* the wrap-around,
898 : * though. All you get is a warning.
899 : */
900 0 : if (oldSerXidControl->warningIssued)
901 : {
902 : TransactionId lowWatermark;
903 :
904 0 : lowWatermark = tailXid + 800000000;
905 0 : if (lowWatermark < FirstNormalTransactionId)
906 0 : lowWatermark = FirstNormalTransactionId;
907 0 : if (TransactionIdPrecedes(xid, lowWatermark))
908 0 : oldSerXidControl->warningIssued = false;
909 : }
910 : else
911 : {
912 : TransactionId highWatermark;
913 :
914 0 : highWatermark = tailXid + 1000000000;
915 0 : if (highWatermark < FirstNormalTransactionId)
916 0 : highWatermark = FirstNormalTransactionId;
917 0 : if (TransactionIdFollows(xid, highWatermark))
918 : {
919 0 : oldSerXidControl->warningIssued = true;
920 0 : ereport(WARNING,
921 : (errmsg("memory for serializable conflict tracking is nearly exhausted"),
922 : errhint("There might be an idle transaction or a forgotten prepared transaction causing this.")));
923 : }
924 : }
925 :
926 0 : if (isNewPage)
927 : {
928 : /* Initialize intervening pages. */
929 0 : while (firstZeroPage != targetPage)
930 : {
931 0 : (void) SimpleLruZeroPage(OldSerXidSlruCtl, firstZeroPage);
932 0 : firstZeroPage = OldSerXidNextPage(firstZeroPage);
933 : }
934 0 : slotno = SimpleLruZeroPage(OldSerXidSlruCtl, targetPage);
935 : }
936 : else
937 0 : slotno = SimpleLruReadPage(OldSerXidSlruCtl, targetPage, true, xid);
938 :
939 0 : OldSerXidValue(slotno, xid) = minConflictCommitSeqNo;
940 0 : OldSerXidSlruCtl->shared->page_dirty[slotno] = true;
941 :
942 0 : LWLockRelease(OldSerXidLock);
943 0 : }
944 :
945 : /*
946 : * Get the minimum commitSeqNo for any conflict out for the given xid. For
947 : * a transaction which exists but has no conflict out, InvalidSerCommitSeqNo
948 : * will be returned.
949 : */
950 : static SerCommitSeqNo
951 0 : OldSerXidGetMinConflictCommitSeqNo(TransactionId xid)
952 : {
953 : TransactionId headXid;
954 : TransactionId tailXid;
955 : SerCommitSeqNo val;
956 : int slotno;
957 :
958 0 : Assert(TransactionIdIsValid(xid));
959 :
960 0 : LWLockAcquire(OldSerXidLock, LW_SHARED);
961 0 : headXid = oldSerXidControl->headXid;
962 0 : tailXid = oldSerXidControl->tailXid;
963 0 : LWLockRelease(OldSerXidLock);
964 :
965 0 : if (!TransactionIdIsValid(headXid))
966 0 : return 0;
967 :
968 0 : Assert(TransactionIdIsValid(tailXid));
969 :
970 0 : if (TransactionIdPrecedes(xid, tailXid)
971 0 : || TransactionIdFollows(xid, headXid))
972 0 : return 0;
973 :
974 : /*
975 : * The following function must be called without holding OldSerXidLock,
976 : * but will return with that lock held, which must then be released.
977 : */
978 0 : slotno = SimpleLruReadPage_ReadOnly(OldSerXidSlruCtl,
979 0 : OldSerXidPage(xid), xid);
980 0 : val = OldSerXidValue(slotno, xid);
981 0 : LWLockRelease(OldSerXidLock);
982 0 : return val;
983 : }
984 :
985 : /*
986 : * Call this whenever there is a new xmin for active serializable
987 : * transactions. We don't need to keep information on transactions which
988 : * precede that. InvalidTransactionId means none active, so everything in
989 : * the SLRU can be discarded.
990 : */
991 : static void
992 16 : OldSerXidSetActiveSerXmin(TransactionId xid)
993 : {
994 16 : LWLockAcquire(OldSerXidLock, LW_EXCLUSIVE);
995 :
996 : /*
997 : * When no sxacts are active, nothing overlaps, set the xid values to
998 : * invalid to show that there are no valid entries. Don't clear headPage,
999 : * though. A new xmin might still land on that page, and we don't want to
1000 : * repeatedly zero out the same page.
1001 : */
1002 16 : if (!TransactionIdIsValid(xid))
1003 : {
1004 8 : oldSerXidControl->tailXid = InvalidTransactionId;
1005 8 : oldSerXidControl->headXid = InvalidTransactionId;
1006 8 : LWLockRelease(OldSerXidLock);
1007 8 : return;
1008 : }
1009 :
1010 : /*
1011 : * When we're recovering prepared transactions, the global xmin might move
1012 : * backwards depending on the order they're recovered. Normally that's not
1013 : * OK, but during recovery no serializable transactions will commit, so
1014 : * the SLRU is empty and we can get away with it.
1015 : */
1016 8 : if (RecoveryInProgress())
1017 : {
1018 0 : Assert(oldSerXidControl->headPage < 0);
1019 0 : if (!TransactionIdIsValid(oldSerXidControl->tailXid)
1020 0 : || TransactionIdPrecedes(xid, oldSerXidControl->tailXid))
1021 : {
1022 0 : oldSerXidControl->tailXid = xid;
1023 : }
1024 0 : LWLockRelease(OldSerXidLock);
1025 0 : return;
1026 : }
1027 :
1028 8 : Assert(!TransactionIdIsValid(oldSerXidControl->tailXid)
1029 : || TransactionIdFollows(xid, oldSerXidControl->tailXid));
1030 :
1031 8 : oldSerXidControl->tailXid = xid;
1032 :
1033 8 : LWLockRelease(OldSerXidLock);
1034 : }
1035 :
1036 : /*
1037 : * Perform a checkpoint --- either during shutdown, or on-the-fly
1038 : *
1039 : * We don't have any data that needs to survive a restart, but this is a
1040 : * convenient place to truncate the SLRU.
1041 : */
1042 : void
1043 11 : CheckPointPredicate(void)
1044 : {
1045 : int tailPage;
1046 :
1047 11 : LWLockAcquire(OldSerXidLock, LW_EXCLUSIVE);
1048 :
1049 : /* Exit quickly if the SLRU is currently not in use. */
1050 11 : if (oldSerXidControl->headPage < 0)
1051 : {
1052 11 : LWLockRelease(OldSerXidLock);
1053 22 : return;
1054 : }
1055 :
1056 0 : if (TransactionIdIsValid(oldSerXidControl->tailXid))
1057 : {
1058 : /* We can truncate the SLRU up to the page containing tailXid */
1059 0 : tailPage = OldSerXidPage(oldSerXidControl->tailXid);
1060 : }
1061 : else
1062 : {
1063 : /*
1064 : * The SLRU is no longer needed. Truncate to head before we set head
1065 : * invalid.
1066 : *
1067 : * XXX: It's possible that the SLRU is not needed again until XID
1068 : * wrap-around has happened, so that the segment containing headPage
1069 : * that we leave behind will appear to be new again. In that case it
1070 : * won't be removed until XID horizon advances enough to make it
1071 : * current again.
1072 : */
1073 0 : tailPage = oldSerXidControl->headPage;
1074 0 : oldSerXidControl->headPage = -1;
1075 : }
1076 :
1077 0 : LWLockRelease(OldSerXidLock);
1078 :
1079 : /* Truncate away pages that are no longer required */
1080 0 : SimpleLruTruncate(OldSerXidSlruCtl, tailPage);
1081 :
1082 : /*
1083 : * Flush dirty SLRU pages to disk
1084 : *
1085 : * This is not actually necessary from a correctness point of view. We do
1086 : * it merely as a debugging aid.
1087 : *
1088 : * We're doing this after the truncation to avoid writing pages right
1089 : * before deleting the file in which they sit, which would be completely
1090 : * pointless.
1091 : */
1092 0 : SimpleLruFlush(OldSerXidSlruCtl, true);
1093 : }
1094 :
1095 : /*------------------------------------------------------------------------*/
1096 :
1097 : /*
1098 : * InitPredicateLocks -- Initialize the predicate locking data structures.
1099 : *
1100 : * This is called from CreateSharedMemoryAndSemaphores(), which see for
1101 : * more comments. In the normal postmaster case, the shared hash tables
1102 : * are created here. Backends inherit the pointers
1103 : * to the shared tables via fork(). In the EXEC_BACKEND case, each
1104 : * backend re-executes this code to obtain pointers to the already existing
1105 : * shared hash tables.
1106 : */
1107 : void
1108 5 : InitPredicateLocks(void)
1109 : {
1110 : HASHCTL info;
1111 : long max_table_size;
1112 : Size requestSize;
1113 : bool found;
1114 :
1115 : #ifndef EXEC_BACKEND
1116 5 : Assert(!IsUnderPostmaster);
1117 : #endif
1118 :
1119 : /*
1120 : * Compute size of predicate lock target hashtable. Note these
1121 : * calculations must agree with PredicateLockShmemSize!
1122 : */
1123 5 : max_table_size = NPREDICATELOCKTARGETENTS();
1124 :
1125 : /*
1126 : * Allocate hash table for PREDICATELOCKTARGET structs. This stores
1127 : * per-predicate-lock-target information.
1128 : */
1129 5 : MemSet(&info, 0, sizeof(info));
1130 5 : info.keysize = sizeof(PREDICATELOCKTARGETTAG);
1131 5 : info.entrysize = sizeof(PREDICATELOCKTARGET);
1132 5 : info.num_partitions = NUM_PREDICATELOCK_PARTITIONS;
1133 :
1134 5 : PredicateLockTargetHash = ShmemInitHash("PREDICATELOCKTARGET hash",
1135 : max_table_size,
1136 : max_table_size,
1137 : &info,
1138 : HASH_ELEM | HASH_BLOBS |
1139 : HASH_PARTITION | HASH_FIXED_SIZE);
1140 :
1141 : /*
1142 : * Reserve a dummy entry in the hash table; we use it to make sure there's
1143 : * always one entry available when we need to split or combine a page,
1144 : * because running out of space there could mean aborting a
1145 : * non-serializable transaction.
1146 : */
1147 5 : if (!IsUnderPostmaster)
1148 : {
1149 5 : (void) hash_search(PredicateLockTargetHash, &ScratchTargetTag,
1150 : HASH_ENTER, &found);
1151 5 : Assert(!found);
1152 : }
1153 :
1154 : /* Pre-calculate the hash and partition lock of the scratch entry */
1155 5 : ScratchTargetTagHash = PredicateLockTargetTagHashCode(&ScratchTargetTag);
1156 5 : ScratchPartitionLock = PredicateLockHashPartitionLock(ScratchTargetTagHash);
1157 :
1158 : /*
1159 : * Allocate hash table for PREDICATELOCK structs. This stores per
1160 : * xact-lock-of-a-target information.
1161 : */
1162 5 : MemSet(&info, 0, sizeof(info));
1163 5 : info.keysize = sizeof(PREDICATELOCKTAG);
1164 5 : info.entrysize = sizeof(PREDICATELOCK);
1165 5 : info.hash = predicatelock_hash;
1166 5 : info.num_partitions = NUM_PREDICATELOCK_PARTITIONS;
1167 :
1168 : /* Assume an average of 2 xacts per target */
1169 5 : max_table_size *= 2;
1170 :
1171 5 : PredicateLockHash = ShmemInitHash("PREDICATELOCK hash",
1172 : max_table_size,
1173 : max_table_size,
1174 : &info,
1175 : HASH_ELEM | HASH_FUNCTION |
1176 : HASH_PARTITION | HASH_FIXED_SIZE);
1177 :
1178 : /*
1179 : * Compute size for serializable transaction hashtable. Note these
1180 : * calculations must agree with PredicateLockShmemSize!
1181 : */
1182 5 : max_table_size = (MaxBackends + max_prepared_xacts);
1183 :
1184 : /*
1185 : * Allocate a list to hold information on transactions participating in
1186 : * predicate locking.
1187 : *
1188 : * Assume an average of 10 predicate locking transactions per backend.
1189 : * This allows aggressive cleanup while detail is present before data must
1190 : * be summarized for storage in SLRU and the "dummy" transaction.
1191 : */
1192 5 : max_table_size *= 10;
1193 :
1194 5 : PredXact = ShmemInitStruct("PredXactList",
1195 : PredXactListDataSize,
1196 : &found);
1197 5 : Assert(found == IsUnderPostmaster);
1198 5 : if (!found)
1199 : {
1200 : int i;
1201 :
1202 5 : SHMQueueInit(&PredXact->availableList);
1203 5 : SHMQueueInit(&PredXact->activeList);
1204 5 : PredXact->SxactGlobalXmin = InvalidTransactionId;
1205 5 : PredXact->SxactGlobalXminCount = 0;
1206 5 : PredXact->WritableSxactCount = 0;
1207 5 : PredXact->LastSxactCommitSeqNo = FirstNormalSerCommitSeqNo - 1;
1208 5 : PredXact->CanPartialClearThrough = 0;
1209 5 : PredXact->HavePartialClearedThrough = 0;
1210 5 : requestSize = mul_size((Size) max_table_size,
1211 : PredXactListElementDataSize);
1212 5 : PredXact->element = ShmemAlloc(requestSize);
1213 : /* Add all elements to available list, clean. */
1214 5 : memset(PredXact->element, 0, requestSize);
1215 5625 : for (i = 0; i < max_table_size; i++)
1216 : {
1217 5620 : SHMQueueInsertBefore(&(PredXact->availableList),
1218 5620 : &(PredXact->element[i].link));
1219 : }
1220 5 : PredXact->OldCommittedSxact = CreatePredXact();
1221 5 : SetInvalidVirtualTransactionId(PredXact->OldCommittedSxact->vxid);
1222 5 : PredXact->OldCommittedSxact->prepareSeqNo = 0;
1223 5 : PredXact->OldCommittedSxact->commitSeqNo = 0;
1224 5 : PredXact->OldCommittedSxact->SeqNo.lastCommitBeforeSnapshot = 0;
1225 5 : SHMQueueInit(&PredXact->OldCommittedSxact->outConflicts);
1226 5 : SHMQueueInit(&PredXact->OldCommittedSxact->inConflicts);
1227 5 : SHMQueueInit(&PredXact->OldCommittedSxact->predicateLocks);
1228 5 : SHMQueueInit(&PredXact->OldCommittedSxact->finishedLink);
1229 5 : SHMQueueInit(&PredXact->OldCommittedSxact->possibleUnsafeConflicts);
1230 5 : PredXact->OldCommittedSxact->topXid = InvalidTransactionId;
1231 5 : PredXact->OldCommittedSxact->finishedBefore = InvalidTransactionId;
1232 5 : PredXact->OldCommittedSxact->xmin = InvalidTransactionId;
1233 5 : PredXact->OldCommittedSxact->flags = SXACT_FLAG_COMMITTED;
1234 5 : PredXact->OldCommittedSxact->pid = 0;
1235 : }
1236 : /* This never changes, so let's keep a local copy. */
1237 5 : OldCommittedSxact = PredXact->OldCommittedSxact;
1238 :
1239 : /*
1240 : * Allocate hash table for SERIALIZABLEXID structs. This stores per-xid
1241 : * information for serializable transactions which have accessed data.
1242 : */
1243 5 : MemSet(&info, 0, sizeof(info));
1244 5 : info.keysize = sizeof(SERIALIZABLEXIDTAG);
1245 5 : info.entrysize = sizeof(SERIALIZABLEXID);
1246 :
1247 5 : SerializableXidHash = ShmemInitHash("SERIALIZABLEXID hash",
1248 : max_table_size,
1249 : max_table_size,
1250 : &info,
1251 : HASH_ELEM | HASH_BLOBS |
1252 : HASH_FIXED_SIZE);
1253 :
1254 : /*
1255 : * Allocate space for tracking rw-conflicts in lists attached to the
1256 : * transactions.
1257 : *
1258 : * Assume an average of 5 conflicts per transaction. Calculations suggest
1259 : * that this will prevent resource exhaustion in even the most pessimal
1260 : * loads up to max_connections = 200 with all 200 connections pounding the
1261 : * database with serializable transactions. Beyond that, there may be
1262 : * occasional transactions canceled when trying to flag conflicts. That's
1263 : * probably OK.
1264 : */
1265 5 : max_table_size *= 5;
1266 :
1267 5 : RWConflictPool = ShmemInitStruct("RWConflictPool",
1268 : RWConflictPoolHeaderDataSize,
1269 : &found);
1270 5 : Assert(found == IsUnderPostmaster);
1271 5 : if (!found)
1272 : {
1273 : int i;
1274 :
1275 5 : SHMQueueInit(&RWConflictPool->availableList);
1276 5 : requestSize = mul_size((Size) max_table_size,
1277 : RWConflictDataSize);
1278 5 : RWConflictPool->element = ShmemAlloc(requestSize);
1279 : /* Add all elements to available list, clean. */
1280 5 : memset(RWConflictPool->element, 0, requestSize);
1281 28105 : for (i = 0; i < max_table_size; i++)
1282 : {
1283 28100 : SHMQueueInsertBefore(&(RWConflictPool->availableList),
1284 28100 : &(RWConflictPool->element[i].outLink));
1285 : }
1286 : }
1287 :
1288 : /*
1289 : * Create or attach to the header for the list of finished serializable
1290 : * transactions.
1291 : */
1292 5 : FinishedSerializableTransactions = (SHM_QUEUE *)
1293 5 : ShmemInitStruct("FinishedSerializableTransactions",
1294 : sizeof(SHM_QUEUE),
1295 : &found);
1296 5 : Assert(found == IsUnderPostmaster);
1297 5 : if (!found)
1298 5 : SHMQueueInit(FinishedSerializableTransactions);
1299 :
1300 : /*
1301 : * Initialize the SLRU storage for old committed serializable
1302 : * transactions.
1303 : */
1304 5 : OldSerXidInit();
1305 5 : }
1306 :
1307 : /*
1308 : * Estimate shared-memory space used for predicate lock table
1309 : */
1310 : Size
1311 5 : PredicateLockShmemSize(void)
1312 : {
1313 5 : Size size = 0;
1314 : long max_table_size;
1315 :
1316 : /* predicate lock target hash table */
1317 5 : max_table_size = NPREDICATELOCKTARGETENTS();
1318 5 : size = add_size(size, hash_estimate_size(max_table_size,
1319 : sizeof(PREDICATELOCKTARGET)));
1320 :
1321 : /* predicate lock hash table */
1322 5 : max_table_size *= 2;
1323 5 : size = add_size(size, hash_estimate_size(max_table_size,
1324 : sizeof(PREDICATELOCK)));
1325 :
1326 : /*
1327 : * Since NPREDICATELOCKTARGETENTS is only an estimate, add 10% safety
1328 : * margin.
1329 : */
1330 5 : size = add_size(size, size / 10);
1331 :
1332 : /* transaction list */
1333 5 : max_table_size = MaxBackends + max_prepared_xacts;
1334 5 : max_table_size *= 10;
1335 5 : size = add_size(size, PredXactListDataSize);
1336 5 : size = add_size(size, mul_size((Size) max_table_size,
1337 : PredXactListElementDataSize));
1338 :
1339 : /* transaction xid table */
1340 5 : size = add_size(size, hash_estimate_size(max_table_size,
1341 : sizeof(SERIALIZABLEXID)));
1342 :
1343 : /* rw-conflict pool */
1344 5 : max_table_size *= 5;
1345 5 : size = add_size(size, RWConflictPoolHeaderDataSize);
1346 5 : size = add_size(size, mul_size((Size) max_table_size,
1347 : RWConflictDataSize));
1348 :
1349 : /* Head for list of finished serializable transactions. */
1350 5 : size = add_size(size, sizeof(SHM_QUEUE));
1351 :
1352 : /* Shared memory structures for SLRU tracking of old committed xids. */
1353 5 : size = add_size(size, sizeof(OldSerXidControlData));
1354 5 : size = add_size(size, SimpleLruShmemSize(NUM_OLDSERXID_BUFFERS, 0));
1355 :
1356 5 : return size;
1357 : }
1358 :
1359 :
1360 : /*
1361 : * Compute the hash code associated with a PREDICATELOCKTAG.
1362 : *
1363 : * Because we want to use just one set of partition locks for both the
1364 : * PREDICATELOCKTARGET and PREDICATELOCK hash tables, we have to make sure
1365 : * that PREDICATELOCKs fall into the same partition number as their
1366 : * associated PREDICATELOCKTARGETs. dynahash.c expects the partition number
1367 : * to be the low-order bits of the hash code, and therefore a
1368 : * PREDICATELOCKTAG's hash code must have the same low-order bits as the
1369 : * associated PREDICATELOCKTARGETTAG's hash code. We achieve this with this
1370 : * specialized hash function.
1371 : */
1372 : static uint32
1373 0 : predicatelock_hash(const void *key, Size keysize)
1374 : {
1375 0 : const PREDICATELOCKTAG *predicatelocktag = (const PREDICATELOCKTAG *) key;
1376 : uint32 targethash;
1377 :
1378 0 : Assert(keysize == sizeof(PREDICATELOCKTAG));
1379 :
1380 : /* Look into the associated target object, and compute its hash code */
1381 0 : targethash = PredicateLockTargetTagHashCode(&predicatelocktag->myTarget->tag);
1382 :
1383 0 : return PredicateLockHashCodeFromTargetHashCode(predicatelocktag, targethash);
1384 : }
1385 :
1386 :
1387 : /*
1388 : * GetPredicateLockStatusData
1389 : * Return a table containing the internal state of the predicate
1390 : * lock manager for use in pg_lock_status.
1391 : *
1392 : * Like GetLockStatusData, this function tries to hold the partition LWLocks
1393 : * for as short a time as possible by returning two arrays that simply
1394 : * contain the PREDICATELOCKTARGETTAG and SERIALIZABLEXACT for each lock
1395 : * table entry. Multiple copies of the same PREDICATELOCKTARGETTAG and
1396 : * SERIALIZABLEXACT will likely appear.
1397 : */
1398 : PredicateLockData *
1399 52 : GetPredicateLockStatusData(void)
1400 : {
1401 : PredicateLockData *data;
1402 : int i;
1403 : int els,
1404 : el;
1405 : HASH_SEQ_STATUS seqstat;
1406 : PREDICATELOCK *predlock;
1407 :
1408 52 : data = (PredicateLockData *) palloc(sizeof(PredicateLockData));
1409 :
1410 : /*
1411 : * To ensure consistency, take simultaneous locks on all partition locks
1412 : * in ascending order, then SerializableXactHashLock.
1413 : */
1414 884 : for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
1415 832 : LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_SHARED);
1416 52 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
1417 :
1418 : /* Get number of locks and allocate appropriately-sized arrays. */
1419 52 : els = hash_get_num_entries(PredicateLockHash);
1420 52 : data->nelements = els;
1421 52 : data->locktags = (PREDICATELOCKTARGETTAG *)
1422 52 : palloc(sizeof(PREDICATELOCKTARGETTAG) * els);
1423 52 : data->xacts = (SERIALIZABLEXACT *)
1424 52 : palloc(sizeof(SERIALIZABLEXACT) * els);
1425 :
1426 :
1427 : /* Scan through PredicateLockHash and copy contents */
1428 52 : hash_seq_init(&seqstat, PredicateLockHash);
1429 :
1430 52 : el = 0;
1431 :
1432 104 : while ((predlock = (PREDICATELOCK *) hash_seq_search(&seqstat)))
1433 : {
1434 0 : data->locktags[el] = predlock->tag.myTarget->tag;
1435 0 : data->xacts[el] = *predlock->tag.myXact;
1436 0 : el++;
1437 : }
1438 :
1439 52 : Assert(el == els);
1440 :
1441 : /* Release locks in reverse order */
1442 52 : LWLockRelease(SerializableXactHashLock);
1443 884 : for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
1444 832 : LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
1445 :
1446 52 : return data;
1447 : }
1448 :
1449 : /*
1450 : * Free up shared memory structures by pushing the oldest sxact (the one at
1451 : * the front of the SummarizeOldestCommittedSxact queue) into summary form.
1452 : * Each call will free exactly one SERIALIZABLEXACT structure and may also
1453 : * free one or more of these structures: SERIALIZABLEXID, PREDICATELOCK,
1454 : * PREDICATELOCKTARGET, RWConflictData.
1455 : */
1456 : static void
1457 0 : SummarizeOldestCommittedSxact(void)
1458 : {
1459 : SERIALIZABLEXACT *sxact;
1460 :
1461 0 : LWLockAcquire(SerializableFinishedListLock, LW_EXCLUSIVE);
1462 :
1463 : /*
1464 : * This function is only called if there are no sxact slots available.
1465 : * Some of them must belong to old, already-finished transactions, so
1466 : * there should be something in FinishedSerializableTransactions list that
1467 : * we can summarize. However, there's a race condition: while we were not
1468 : * holding any locks, a transaction might have ended and cleaned up all
1469 : * the finished sxact entries already, freeing up their sxact slots. In
1470 : * that case, we have nothing to do here. The caller will find one of the
1471 : * slots released by the other backend when it retries.
1472 : */
1473 0 : if (SHMQueueEmpty(FinishedSerializableTransactions))
1474 : {
1475 0 : LWLockRelease(SerializableFinishedListLock);
1476 0 : return;
1477 : }
1478 :
1479 : /*
1480 : * Grab the first sxact off the finished list -- this will be the earliest
1481 : * commit. Remove it from the list.
1482 : */
1483 0 : sxact = (SERIALIZABLEXACT *)
1484 0 : SHMQueueNext(FinishedSerializableTransactions,
1485 : FinishedSerializableTransactions,
1486 : offsetof(SERIALIZABLEXACT, finishedLink));
1487 0 : SHMQueueDelete(&(sxact->finishedLink));
1488 :
1489 : /* Add to SLRU summary information. */
1490 0 : if (TransactionIdIsValid(sxact->topXid) && !SxactIsReadOnly(sxact))
1491 0 : OldSerXidAdd(sxact->topXid, SxactHasConflictOut(sxact)
1492 : ? sxact->SeqNo.earliestOutConflictCommit : InvalidSerCommitSeqNo);
1493 :
1494 : /* Summarize and release the detail. */
1495 0 : ReleaseOneSerializableXact(sxact, false, true);
1496 :
1497 0 : LWLockRelease(SerializableFinishedListLock);
1498 : }
1499 :
1500 : /*
1501 : * GetSafeSnapshot
1502 : * Obtain and register a snapshot for a READ ONLY DEFERRABLE
1503 : * transaction. Ensures that the snapshot is "safe", i.e. a
1504 : * read-only transaction running on it can execute serializably
1505 : * without further checks. This requires waiting for concurrent
1506 : * transactions to complete, and retrying with a new snapshot if
1507 : * one of them could possibly create a conflict.
1508 : *
1509 : * As with GetSerializableTransactionSnapshot (which this is a subroutine
1510 : * for), the passed-in Snapshot pointer should reference a static data
1511 : * area that can safely be passed to GetSnapshotData.
1512 : */
1513 : static Snapshot
1514 1 : GetSafeSnapshot(Snapshot origSnapshot)
1515 : {
1516 : Snapshot snapshot;
1517 :
1518 1 : Assert(XactReadOnly && XactDeferrable);
1519 :
1520 : while (true)
1521 : {
1522 : /*
1523 : * GetSerializableTransactionSnapshotInt is going to call
1524 : * GetSnapshotData, so we need to provide it the static snapshot area
1525 : * our caller passed to us. The pointer returned is actually the same
1526 : * one passed to it, but we avoid assuming that here.
1527 : */
1528 1 : snapshot = GetSerializableTransactionSnapshotInt(origSnapshot,
1529 : NULL, InvalidPid);
1530 :
1531 1 : if (MySerializableXact == InvalidSerializableXact)
1532 1 : return snapshot; /* no concurrent r/w xacts; it's safe */
1533 :
1534 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1535 :
1536 : /*
1537 : * Wait for concurrent transactions to finish. Stop early if one of
1538 : * them marked us as conflicted.
1539 : */
1540 0 : MySerializableXact->flags |= SXACT_FLAG_DEFERRABLE_WAITING;
1541 0 : while (!(SHMQueueEmpty(&MySerializableXact->possibleUnsafeConflicts) ||
1542 0 : SxactIsROUnsafe(MySerializableXact)))
1543 : {
1544 0 : LWLockRelease(SerializableXactHashLock);
1545 0 : ProcWaitForSignal(WAIT_EVENT_SAFE_SNAPSHOT);
1546 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1547 : }
1548 0 : MySerializableXact->flags &= ~SXACT_FLAG_DEFERRABLE_WAITING;
1549 :
1550 0 : if (!SxactIsROUnsafe(MySerializableXact))
1551 : {
1552 0 : LWLockRelease(SerializableXactHashLock);
1553 0 : break; /* success */
1554 : }
1555 :
1556 0 : LWLockRelease(SerializableXactHashLock);
1557 :
1558 : /* else, need to retry... */
1559 0 : ereport(DEBUG2,
1560 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1561 : errmsg("deferrable snapshot was unsafe; trying a new one")));
1562 0 : ReleasePredicateLocks(false);
1563 0 : }
1564 :
1565 : /*
1566 : * Now we have a safe snapshot, so we don't need to do any further checks.
1567 : */
1568 0 : Assert(SxactIsROSafe(MySerializableXact));
1569 0 : ReleasePredicateLocks(false);
1570 :
1571 0 : return snapshot;
1572 : }
1573 :
1574 : /*
1575 : * GetSafeSnapshotBlockingPids
1576 : * If the specified process is currently blocked in GetSafeSnapshot,
1577 : * write the process IDs of all processes that it is blocked by
1578 : * into the caller-supplied buffer output[]. The list is truncated at
1579 : * output_size, and the number of PIDs written into the buffer is
1580 : * returned. Returns zero if the given PID is not currently blocked
1581 : * in GetSafeSnapshot.
1582 : */
1583 : int
1584 0 : GetSafeSnapshotBlockingPids(int blocked_pid, int *output, int output_size)
1585 : {
1586 0 : int num_written = 0;
1587 : SERIALIZABLEXACT *sxact;
1588 :
1589 0 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
1590 :
1591 : /* Find blocked_pid's SERIALIZABLEXACT by linear search. */
1592 0 : for (sxact = FirstPredXact(); sxact != NULL; sxact = NextPredXact(sxact))
1593 : {
1594 0 : if (sxact->pid == blocked_pid)
1595 0 : break;
1596 : }
1597 :
1598 : /* Did we find it, and is it currently waiting in GetSafeSnapshot? */
1599 0 : if (sxact != NULL && SxactIsDeferrableWaiting(sxact))
1600 : {
1601 : RWConflict possibleUnsafeConflict;
1602 :
1603 : /* Traverse the list of possible unsafe conflicts collecting PIDs. */
1604 0 : possibleUnsafeConflict = (RWConflict)
1605 0 : SHMQueueNext(&sxact->possibleUnsafeConflicts,
1606 0 : &sxact->possibleUnsafeConflicts,
1607 : offsetof(RWConflictData, inLink));
1608 :
1609 0 : while (possibleUnsafeConflict != NULL && num_written < output_size)
1610 : {
1611 0 : output[num_written++] = possibleUnsafeConflict->sxactOut->pid;
1612 0 : possibleUnsafeConflict = (RWConflict)
1613 0 : SHMQueueNext(&sxact->possibleUnsafeConflicts,
1614 0 : &possibleUnsafeConflict->inLink,
1615 : offsetof(RWConflictData, inLink));
1616 : }
1617 : }
1618 :
1619 0 : LWLockRelease(SerializableXactHashLock);
1620 :
1621 0 : return num_written;
1622 : }
1623 :
1624 : /*
1625 : * Acquire a snapshot that can be used for the current transaction.
1626 : *
1627 : * Make sure we have a SERIALIZABLEXACT reference in MySerializableXact.
1628 : * It should be current for this process and be contained in PredXact.
1629 : *
1630 : * The passed-in Snapshot pointer should reference a static data area that
1631 : * can safely be passed to GetSnapshotData. The return value is actually
1632 : * always this same pointer; no new snapshot data structure is allocated
1633 : * within this function.
1634 : */
1635 : Snapshot
1636 13 : GetSerializableTransactionSnapshot(Snapshot snapshot)
1637 : {
1638 13 : Assert(IsolationIsSerializable());
1639 :
1640 : /*
1641 : * Can't use serializable mode while recovery is still active, as it is,
1642 : * for example, on a hot standby. We could get here despite the check in
1643 : * check_XactIsoLevel() if default_transaction_isolation is set to
1644 : * serializable, so phrase the hint accordingly.
1645 : */
1646 13 : if (RecoveryInProgress())
1647 0 : ereport(ERROR,
1648 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1649 : errmsg("cannot use serializable mode in a hot standby"),
1650 : errdetail("\"default_transaction_isolation\" is set to \"serializable\"."),
1651 : errhint("You can use \"SET default_transaction_isolation = 'repeatable read'\" to change the default.")));
1652 :
1653 : /*
1654 : * A special optimization is available for SERIALIZABLE READ ONLY
1655 : * DEFERRABLE transactions -- we can wait for a suitable snapshot and
1656 : * thereby avoid all SSI overhead once it's running.
1657 : */
1658 13 : if (XactReadOnly && XactDeferrable)
1659 1 : return GetSafeSnapshot(snapshot);
1660 :
1661 12 : return GetSerializableTransactionSnapshotInt(snapshot,
1662 : NULL, InvalidPid);
1663 : }
1664 :
1665 : /*
1666 : * Import a snapshot to be used for the current transaction.
1667 : *
1668 : * This is nearly the same as GetSerializableTransactionSnapshot, except that
1669 : * we don't take a new snapshot, but rather use the data we're handed.
1670 : *
1671 : * The caller must have verified that the snapshot came from a serializable
1672 : * transaction; and if we're read-write, the source transaction must not be
1673 : * read-only.
1674 : */
1675 : void
1676 0 : SetSerializableTransactionSnapshot(Snapshot snapshot,
1677 : VirtualTransactionId *sourcevxid,
1678 : int sourcepid)
1679 : {
1680 0 : Assert(IsolationIsSerializable());
1681 :
1682 : /*
1683 : * We do not allow SERIALIZABLE READ ONLY DEFERRABLE transactions to
1684 : * import snapshots, since there's no way to wait for a safe snapshot when
1685 : * we're using the snap we're told to. (XXX instead of throwing an error,
1686 : * we could just ignore the XactDeferrable flag?)
1687 : */
1688 0 : if (XactReadOnly && XactDeferrable)
1689 0 : ereport(ERROR,
1690 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1691 : errmsg("a snapshot-importing transaction must not be READ ONLY DEFERRABLE")));
1692 :
1693 0 : (void) GetSerializableTransactionSnapshotInt(snapshot, sourcevxid,
1694 : sourcepid);
1695 0 : }
1696 :
1697 : /*
1698 : * Guts of GetSerializableTransactionSnapshot
1699 : *
1700 : * If sourcexid is valid, this is actually an import operation and we should
1701 : * skip calling GetSnapshotData, because the snapshot contents are already
1702 : * loaded up. HOWEVER: to avoid race conditions, we must check that the
1703 : * source xact is still running after we acquire SerializableXactHashLock.
1704 : * We do that by calling ProcArrayInstallImportedXmin.
1705 : */
1706 : static Snapshot
1707 13 : GetSerializableTransactionSnapshotInt(Snapshot snapshot,
1708 : VirtualTransactionId *sourcevxid,
1709 : int sourcepid)
1710 : {
1711 : PGPROC *proc;
1712 : VirtualTransactionId vxid;
1713 : SERIALIZABLEXACT *sxact,
1714 : *othersxact;
1715 : HASHCTL hash_ctl;
1716 :
1717 : /* We only do this for serializable transactions. Once. */
1718 13 : Assert(MySerializableXact == InvalidSerializableXact);
1719 :
1720 13 : Assert(!RecoveryInProgress());
1721 :
1722 : /*
1723 : * Since all parts of a serializable transaction must use the same
1724 : * snapshot, it is too late to establish one after a parallel operation
1725 : * has begun.
1726 : */
1727 13 : if (IsInParallelMode())
1728 0 : elog(ERROR, "cannot establish serializable snapshot during a parallel operation");
1729 :
1730 13 : proc = MyProc;
1731 13 : Assert(proc != NULL);
1732 13 : GET_VXID_FROM_PGPROC(vxid, *proc);
1733 :
1734 : /*
1735 : * First we get the sxact structure, which may involve looping and access
1736 : * to the "finished" list to free a structure for use.
1737 : *
1738 : * We must hold SerializableXactHashLock when taking/checking the snapshot
1739 : * to avoid race conditions, for much the same reasons that
1740 : * GetSnapshotData takes the ProcArrayLock. Since we might have to
1741 : * release SerializableXactHashLock to call SummarizeOldestCommittedSxact,
1742 : * this means we have to create the sxact first, which is a bit annoying
1743 : * (in particular, an elog(ERROR) in procarray.c would cause us to leak
1744 : * the sxact). Consider refactoring to avoid this.
1745 : */
1746 : #ifdef TEST_OLDSERXID
1747 : SummarizeOldestCommittedSxact();
1748 : #endif
1749 13 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1750 : do
1751 : {
1752 13 : sxact = CreatePredXact();
1753 : /* If null, push out committed sxact to SLRU summary & retry. */
1754 13 : if (!sxact)
1755 : {
1756 0 : LWLockRelease(SerializableXactHashLock);
1757 0 : SummarizeOldestCommittedSxact();
1758 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1759 : }
1760 13 : } while (!sxact);
1761 :
1762 : /* Get the snapshot, or check that it's safe to use */
1763 13 : if (!sourcevxid)
1764 13 : snapshot = GetSnapshotData(snapshot);
1765 0 : else if (!ProcArrayInstallImportedXmin(snapshot->xmin, sourcevxid))
1766 : {
1767 0 : ReleasePredXact(sxact);
1768 0 : LWLockRelease(SerializableXactHashLock);
1769 0 : ereport(ERROR,
1770 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1771 : errmsg("could not import the requested snapshot"),
1772 : errdetail("The source process with pid %d is not running anymore.",
1773 : sourcepid)));
1774 : }
1775 :
1776 : /*
1777 : * If there are no serializable transactions which are not read-only, we
1778 : * can "opt out" of predicate locking and conflict checking for a
1779 : * read-only transaction.
1780 : *
1781 : * The reason this is safe is that a read-only transaction can only become
1782 : * part of a dangerous structure if it overlaps a writable transaction
1783 : * which in turn overlaps a writable transaction which committed before
1784 : * the read-only transaction started. A new writable transaction can
1785 : * overlap this one, but it can't meet the other condition of overlapping
1786 : * a transaction which committed before this one started.
1787 : */
1788 13 : if (XactReadOnly && PredXact->WritableSxactCount == 0)
1789 : {
1790 1 : ReleasePredXact(sxact);
1791 1 : LWLockRelease(SerializableXactHashLock);
1792 1 : return snapshot;
1793 : }
1794 :
1795 : /* Maintain serializable global xmin info. */
1796 12 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
1797 : {
1798 8 : Assert(PredXact->SxactGlobalXminCount == 0);
1799 8 : PredXact->SxactGlobalXmin = snapshot->xmin;
1800 8 : PredXact->SxactGlobalXminCount = 1;
1801 8 : OldSerXidSetActiveSerXmin(snapshot->xmin);
1802 : }
1803 4 : else if (TransactionIdEquals(snapshot->xmin, PredXact->SxactGlobalXmin))
1804 : {
1805 2 : Assert(PredXact->SxactGlobalXminCount > 0);
1806 2 : PredXact->SxactGlobalXminCount++;
1807 : }
1808 : else
1809 : {
1810 2 : Assert(TransactionIdFollows(snapshot->xmin, PredXact->SxactGlobalXmin));
1811 : }
1812 :
1813 : /* Initialize the structure. */
1814 12 : sxact->vxid = vxid;
1815 12 : sxact->SeqNo.lastCommitBeforeSnapshot = PredXact->LastSxactCommitSeqNo;
1816 12 : sxact->prepareSeqNo = InvalidSerCommitSeqNo;
1817 12 : sxact->commitSeqNo = InvalidSerCommitSeqNo;
1818 12 : SHMQueueInit(&(sxact->outConflicts));
1819 12 : SHMQueueInit(&(sxact->inConflicts));
1820 12 : SHMQueueInit(&(sxact->possibleUnsafeConflicts));
1821 12 : sxact->topXid = GetTopTransactionIdIfAny();
1822 12 : sxact->finishedBefore = InvalidTransactionId;
1823 12 : sxact->xmin = snapshot->xmin;
1824 12 : sxact->pid = MyProcPid;
1825 12 : SHMQueueInit(&(sxact->predicateLocks));
1826 12 : SHMQueueElemInit(&(sxact->finishedLink));
1827 12 : sxact->flags = 0;
1828 12 : if (XactReadOnly)
1829 : {
1830 0 : sxact->flags |= SXACT_FLAG_READ_ONLY;
1831 :
1832 : /*
1833 : * Register all concurrent r/w transactions as possible conflicts; if
1834 : * all of them commit without any outgoing conflicts to earlier
1835 : * transactions then this snapshot can be deemed safe (and we can run
1836 : * without tracking predicate locks).
1837 : */
1838 0 : for (othersxact = FirstPredXact();
1839 : othersxact != NULL;
1840 0 : othersxact = NextPredXact(othersxact))
1841 : {
1842 0 : if (!SxactIsCommitted(othersxact)
1843 0 : && !SxactIsDoomed(othersxact)
1844 0 : && !SxactIsReadOnly(othersxact))
1845 : {
1846 0 : SetPossibleUnsafeConflict(sxact, othersxact);
1847 : }
1848 : }
1849 : }
1850 : else
1851 : {
1852 12 : ++(PredXact->WritableSxactCount);
1853 12 : Assert(PredXact->WritableSxactCount <=
1854 : (MaxBackends + max_prepared_xacts));
1855 : }
1856 :
1857 12 : MySerializableXact = sxact;
1858 12 : MyXactDidWrite = false; /* haven't written anything yet */
1859 :
1860 12 : LWLockRelease(SerializableXactHashLock);
1861 :
1862 : /* Initialize the backend-local hash table of parent locks */
1863 12 : Assert(LocalPredicateLockHash == NULL);
1864 12 : MemSet(&hash_ctl, 0, sizeof(hash_ctl));
1865 12 : hash_ctl.keysize = sizeof(PREDICATELOCKTARGETTAG);
1866 12 : hash_ctl.entrysize = sizeof(LOCALPREDICATELOCK);
1867 12 : LocalPredicateLockHash = hash_create("Local predicate lock",
1868 : max_predicate_locks_per_xact,
1869 : &hash_ctl,
1870 : HASH_ELEM | HASH_BLOBS);
1871 :
1872 12 : return snapshot;
1873 : }
1874 :
1875 : /*
1876 : * Register the top level XID in SerializableXidHash.
1877 : * Also store it for easy reference in MySerializableXact.
1878 : */
1879 : void
1880 10594 : RegisterPredicateLockingXid(TransactionId xid)
1881 : {
1882 : SERIALIZABLEXIDTAG sxidtag;
1883 : SERIALIZABLEXID *sxid;
1884 : bool found;
1885 :
1886 : /*
1887 : * If we're not tracking predicate lock data for this transaction, we
1888 : * should ignore the request and return quickly.
1889 : */
1890 10594 : if (MySerializableXact == InvalidSerializableXact)
1891 21176 : return;
1892 :
1893 : /* We should have a valid XID and be at the top level. */
1894 12 : Assert(TransactionIdIsValid(xid));
1895 :
1896 12 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
1897 :
1898 : /* This should only be done once per transaction. */
1899 12 : Assert(MySerializableXact->topXid == InvalidTransactionId);
1900 :
1901 12 : MySerializableXact->topXid = xid;
1902 :
1903 12 : sxidtag.xid = xid;
1904 12 : sxid = (SERIALIZABLEXID *) hash_search(SerializableXidHash,
1905 : &sxidtag,
1906 : HASH_ENTER, &found);
1907 12 : Assert(!found);
1908 :
1909 : /* Initialize the structure. */
1910 12 : sxid->myXact = MySerializableXact;
1911 12 : LWLockRelease(SerializableXactHashLock);
1912 : }
1913 :
1914 :
1915 : /*
1916 : * Check whether there are any predicate locks held by any transaction
1917 : * for the page at the given block number.
1918 : *
1919 : * Note that the transaction may be completed but not yet subject to
1920 : * cleanup due to overlapping serializable transactions. This must
1921 : * return valid information regardless of transaction isolation level.
1922 : *
1923 : * Also note that this doesn't check for a conflicting relation lock,
1924 : * just a lock specifically on the given page.
1925 : *
1926 : * One use is to support proper behavior during GiST index vacuum.
1927 : */
1928 : bool
1929 0 : PageIsPredicateLocked(Relation relation, BlockNumber blkno)
1930 : {
1931 : PREDICATELOCKTARGETTAG targettag;
1932 : uint32 targettaghash;
1933 : LWLock *partitionLock;
1934 : PREDICATELOCKTARGET *target;
1935 :
1936 0 : SET_PREDICATELOCKTARGETTAG_PAGE(targettag,
1937 : relation->rd_node.dbNode,
1938 : relation->rd_id,
1939 : blkno);
1940 :
1941 0 : targettaghash = PredicateLockTargetTagHashCode(&targettag);
1942 0 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
1943 0 : LWLockAcquire(partitionLock, LW_SHARED);
1944 0 : target = (PREDICATELOCKTARGET *)
1945 0 : hash_search_with_hash_value(PredicateLockTargetHash,
1946 : &targettag, targettaghash,
1947 : HASH_FIND, NULL);
1948 0 : LWLockRelease(partitionLock);
1949 :
1950 0 : return (target != NULL);
1951 : }
1952 :
1953 :
1954 : /*
1955 : * Check whether a particular lock is held by this transaction.
1956 : *
1957 : * Important note: this function may return false even if the lock is
1958 : * being held, because it uses the local lock table which is not
1959 : * updated if another transaction modifies our lock list (e.g. to
1960 : * split an index page). It can also return true when a coarser
1961 : * granularity lock that covers this target is being held. Be careful
1962 : * to only use this function in circumstances where such errors are
1963 : * acceptable!
1964 : */
1965 : static bool
1966 12 : PredicateLockExists(const PREDICATELOCKTARGETTAG *targettag)
1967 : {
1968 : LOCALPREDICATELOCK *lock;
1969 :
1970 : /* check local hash table */
1971 12 : lock = (LOCALPREDICATELOCK *) hash_search(LocalPredicateLockHash,
1972 : targettag,
1973 : HASH_FIND, NULL);
1974 :
1975 12 : if (!lock)
1976 8 : return false;
1977 :
1978 : /*
1979 : * Found entry in the table, but still need to check whether it's actually
1980 : * held -- it could just be a parent of some held lock.
1981 : */
1982 4 : return lock->held;
1983 : }
1984 :
1985 : /*
1986 : * Return the parent lock tag in the lock hierarchy: the next coarser
1987 : * lock that covers the provided tag.
1988 : *
1989 : * Returns true and sets *parent to the parent tag if one exists,
1990 : * returns false if none exists.
1991 : */
1992 : static bool
1993 16 : GetParentPredicateLockTag(const PREDICATELOCKTARGETTAG *tag,
1994 : PREDICATELOCKTARGETTAG *parent)
1995 : {
1996 16 : switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
1997 : {
1998 : case PREDLOCKTAG_RELATION:
1999 : /* relation locks have no parent lock */
2000 16 : return false;
2001 :
2002 : case PREDLOCKTAG_PAGE:
2003 : /* parent lock is relation lock */
2004 0 : SET_PREDICATELOCKTARGETTAG_RELATION(*parent,
2005 : GET_PREDICATELOCKTARGETTAG_DB(*tag),
2006 : GET_PREDICATELOCKTARGETTAG_RELATION(*tag));
2007 :
2008 0 : return true;
2009 :
2010 : case PREDLOCKTAG_TUPLE:
2011 : /* parent lock is page lock */
2012 0 : SET_PREDICATELOCKTARGETTAG_PAGE(*parent,
2013 : GET_PREDICATELOCKTARGETTAG_DB(*tag),
2014 : GET_PREDICATELOCKTARGETTAG_RELATION(*tag),
2015 : GET_PREDICATELOCKTARGETTAG_PAGE(*tag));
2016 0 : return true;
2017 : }
2018 :
2019 : /* not reachable */
2020 0 : Assert(false);
2021 : return false;
2022 : }
2023 :
2024 : /*
2025 : * Check whether the lock we are considering is already covered by a
2026 : * coarser lock for our transaction.
2027 : *
2028 : * Like PredicateLockExists, this function might return a false
2029 : * negative, but it will never return a false positive.
2030 : */
2031 : static bool
2032 8 : CoarserLockCovers(const PREDICATELOCKTARGETTAG *newtargettag)
2033 : {
2034 : PREDICATELOCKTARGETTAG targettag,
2035 : parenttag;
2036 :
2037 8 : targettag = *newtargettag;
2038 :
2039 : /* check parents iteratively until no more */
2040 16 : while (GetParentPredicateLockTag(&targettag, &parenttag))
2041 : {
2042 0 : targettag = parenttag;
2043 0 : if (PredicateLockExists(&targettag))
2044 0 : return true;
2045 : }
2046 :
2047 : /* no more parents to check; lock is not covered */
2048 8 : return false;
2049 : }
2050 :
2051 : /*
2052 : * Remove the dummy entry from the predicate lock target hash, to free up some
2053 : * scratch space. The caller must be holding SerializablePredicateLockListLock,
2054 : * and must restore the entry with RestoreScratchTarget() before releasing the
2055 : * lock.
2056 : *
2057 : * If lockheld is true, the caller is already holding the partition lock
2058 : * of the partition containing the scratch entry.
2059 : */
2060 : static void
2061 9 : RemoveScratchTarget(bool lockheld)
2062 : {
2063 : bool found;
2064 :
2065 9 : Assert(LWLockHeldByMe(SerializablePredicateLockListLock));
2066 :
2067 9 : if (!lockheld)
2068 0 : LWLockAcquire(ScratchPartitionLock, LW_EXCLUSIVE);
2069 9 : hash_search_with_hash_value(PredicateLockTargetHash,
2070 : &ScratchTargetTag,
2071 : ScratchTargetTagHash,
2072 : HASH_REMOVE, &found);
2073 9 : Assert(found);
2074 9 : if (!lockheld)
2075 0 : LWLockRelease(ScratchPartitionLock);
2076 9 : }
2077 :
2078 : /*
2079 : * Re-insert the dummy entry in predicate lock target hash.
2080 : */
2081 : static void
2082 9 : RestoreScratchTarget(bool lockheld)
2083 : {
2084 : bool found;
2085 :
2086 9 : Assert(LWLockHeldByMe(SerializablePredicateLockListLock));
2087 :
2088 9 : if (!lockheld)
2089 0 : LWLockAcquire(ScratchPartitionLock, LW_EXCLUSIVE);
2090 9 : hash_search_with_hash_value(PredicateLockTargetHash,
2091 : &ScratchTargetTag,
2092 : ScratchTargetTagHash,
2093 : HASH_ENTER, &found);
2094 9 : Assert(!found);
2095 9 : if (!lockheld)
2096 0 : LWLockRelease(ScratchPartitionLock);
2097 9 : }
2098 :
2099 : /*
2100 : * Check whether the list of related predicate locks is empty for a
2101 : * predicate lock target, and remove the target if it is.
2102 : */
2103 : static void
2104 8 : RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target, uint32 targettaghash)
2105 : {
2106 : PREDICATELOCKTARGET *rmtarget PG_USED_FOR_ASSERTS_ONLY;
2107 :
2108 8 : Assert(LWLockHeldByMe(SerializablePredicateLockListLock));
2109 :
2110 : /* Can't remove it until no locks at this target. */
2111 8 : if (!SHMQueueEmpty(&target->predicateLocks))
2112 9 : return;
2113 :
2114 : /* Actually remove the target. */
2115 7 : rmtarget = hash_search_with_hash_value(PredicateLockTargetHash,
2116 7 : &target->tag,
2117 : targettaghash,
2118 : HASH_REMOVE, NULL);
2119 7 : Assert(rmtarget == target);
2120 : }
2121 :
2122 : /*
2123 : * Delete child target locks owned by this process.
2124 : * This implementation is assuming that the usage of each target tag field
2125 : * is uniform. No need to make this hard if we don't have to.
2126 : *
2127 : * We aren't acquiring lightweight locks for the predicate lock or lock
2128 : * target structures associated with this transaction unless we're going
2129 : * to modify them, because no other process is permitted to modify our
2130 : * locks.
2131 : */
2132 : static void
2133 8 : DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag)
2134 : {
2135 : SERIALIZABLEXACT *sxact;
2136 : PREDICATELOCK *predlock;
2137 :
2138 8 : LWLockAcquire(SerializablePredicateLockListLock, LW_SHARED);
2139 8 : sxact = MySerializableXact;
2140 8 : predlock = (PREDICATELOCK *)
2141 8 : SHMQueueNext(&(sxact->predicateLocks),
2142 8 : &(sxact->predicateLocks),
2143 : offsetof(PREDICATELOCK, xactLink));
2144 24 : while (predlock)
2145 : {
2146 : SHM_QUEUE *predlocksxactlink;
2147 : PREDICATELOCK *nextpredlock;
2148 : PREDICATELOCKTAG oldlocktag;
2149 : PREDICATELOCKTARGET *oldtarget;
2150 : PREDICATELOCKTARGETTAG oldtargettag;
2151 :
2152 8 : predlocksxactlink = &(predlock->xactLink);
2153 8 : nextpredlock = (PREDICATELOCK *)
2154 8 : SHMQueueNext(&(sxact->predicateLocks),
2155 : predlocksxactlink,
2156 : offsetof(PREDICATELOCK, xactLink));
2157 :
2158 8 : oldlocktag = predlock->tag;
2159 8 : Assert(oldlocktag.myXact == sxact);
2160 8 : oldtarget = oldlocktag.myTarget;
2161 8 : oldtargettag = oldtarget->tag;
2162 :
2163 8 : if (TargetTagIsCoveredBy(oldtargettag, *newtargettag))
2164 : {
2165 : uint32 oldtargettaghash;
2166 : LWLock *partitionLock;
2167 : PREDICATELOCK *rmpredlock PG_USED_FOR_ASSERTS_ONLY;
2168 :
2169 0 : oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag);
2170 0 : partitionLock = PredicateLockHashPartitionLock(oldtargettaghash);
2171 :
2172 0 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
2173 :
2174 0 : SHMQueueDelete(predlocksxactlink);
2175 0 : SHMQueueDelete(&(predlock->targetLink));
2176 0 : rmpredlock = hash_search_with_hash_value
2177 : (PredicateLockHash,
2178 : &oldlocktag,
2179 0 : PredicateLockHashCodeFromTargetHashCode(&oldlocktag,
2180 : oldtargettaghash),
2181 : HASH_REMOVE, NULL);
2182 0 : Assert(rmpredlock == predlock);
2183 :
2184 0 : RemoveTargetIfNoLongerUsed(oldtarget, oldtargettaghash);
2185 :
2186 0 : LWLockRelease(partitionLock);
2187 :
2188 0 : DecrementParentLocks(&oldtargettag);
2189 : }
2190 :
2191 8 : predlock = nextpredlock;
2192 : }
2193 8 : LWLockRelease(SerializablePredicateLockListLock);
2194 8 : }
2195 :
2196 : /*
2197 : * Returns the promotion limit for a given predicate lock target. This is the
2198 : * max number of descendant locks allowed before promoting to the specified
2199 : * tag. Note that the limit includes non-direct descendants (e.g., both tuples
2200 : * and pages for a relation lock).
2201 : *
2202 : * Currently the default limit is 2 for a page lock, and half of the value of
2203 : * max_pred_locks_per_transaction - 1 for a relation lock, to match behavior
2204 : * of earlier releases when upgrading.
2205 : *
2206 : * TODO SSI: We should probably add additional GUCs to allow a maximum ratio
2207 : * of page and tuple locks based on the pages in a relation, and the maximum
2208 : * ratio of tuple locks to tuples in a page. This would provide more
2209 : * generally "balanced" allocation of locks to where they are most useful,
2210 : * while still allowing the absolute numbers to prevent one relation from
2211 : * tying up all predicate lock resources.
2212 : */
2213 : static int
2214 0 : MaxPredicateChildLocks(const PREDICATELOCKTARGETTAG *tag)
2215 : {
2216 0 : switch (GET_PREDICATELOCKTARGETTAG_TYPE(*tag))
2217 : {
2218 : case PREDLOCKTAG_RELATION:
2219 0 : return max_predicate_locks_per_relation < 0
2220 : ? (max_predicate_locks_per_xact
2221 0 : / (-max_predicate_locks_per_relation)) - 1
2222 0 : : max_predicate_locks_per_relation;
2223 :
2224 : case PREDLOCKTAG_PAGE:
2225 0 : return max_predicate_locks_per_page;
2226 :
2227 : case PREDLOCKTAG_TUPLE:
2228 :
2229 : /*
2230 : * not reachable: nothing is finer-granularity than a tuple, so we
2231 : * should never try to promote to it.
2232 : */
2233 0 : Assert(false);
2234 : return 0;
2235 : }
2236 :
2237 : /* not reachable */
2238 0 : Assert(false);
2239 : return 0;
2240 : }
2241 :
2242 : /*
2243 : * For all ancestors of a newly-acquired predicate lock, increment
2244 : * their child count in the parent hash table. If any of them have
2245 : * more descendants than their promotion threshold, acquire the
2246 : * coarsest such lock.
2247 : *
2248 : * Returns true if a parent lock was acquired and false otherwise.
2249 : */
2250 : static bool
2251 8 : CheckAndPromotePredicateLockRequest(const PREDICATELOCKTARGETTAG *reqtag)
2252 : {
2253 : PREDICATELOCKTARGETTAG targettag,
2254 : nexttag,
2255 : promotiontag;
2256 : LOCALPREDICATELOCK *parentlock;
2257 : bool found,
2258 : promote;
2259 :
2260 8 : promote = false;
2261 :
2262 8 : targettag = *reqtag;
2263 :
2264 : /* check parents iteratively */
2265 16 : while (GetParentPredicateLockTag(&targettag, &nexttag))
2266 : {
2267 0 : targettag = nexttag;
2268 0 : parentlock = (LOCALPREDICATELOCK *) hash_search(LocalPredicateLockHash,
2269 : &targettag,
2270 : HASH_ENTER,
2271 : &found);
2272 0 : if (!found)
2273 : {
2274 0 : parentlock->held = false;
2275 0 : parentlock->childLocks = 1;
2276 : }
2277 : else
2278 0 : parentlock->childLocks++;
2279 :
2280 0 : if (parentlock->childLocks >
2281 0 : MaxPredicateChildLocks(&targettag))
2282 : {
2283 : /*
2284 : * We should promote to this parent lock. Continue to check its
2285 : * ancestors, however, both to get their child counts right and to
2286 : * check whether we should just go ahead and promote to one of
2287 : * them.
2288 : */
2289 0 : promotiontag = targettag;
2290 0 : promote = true;
2291 : }
2292 : }
2293 :
2294 8 : if (promote)
2295 : {
2296 : /* acquire coarsest ancestor eligible for promotion */
2297 0 : PredicateLockAcquire(&promotiontag);
2298 0 : return true;
2299 : }
2300 : else
2301 8 : return false;
2302 : }
2303 :
2304 : /*
2305 : * When releasing a lock, decrement the child count on all ancestor
2306 : * locks.
2307 : *
2308 : * This is called only when releasing a lock via
2309 : * DeleteChildTargetLocks (i.e. when a lock becomes redundant because
2310 : * we've acquired its parent, possibly due to promotion) or when a new
2311 : * MVCC write lock makes the predicate lock unnecessary. There's no
2312 : * point in calling it when locks are released at transaction end, as
2313 : * this information is no longer needed.
2314 : */
2315 : static void
2316 0 : DecrementParentLocks(const PREDICATELOCKTARGETTAG *targettag)
2317 : {
2318 : PREDICATELOCKTARGETTAG parenttag,
2319 : nexttag;
2320 :
2321 0 : parenttag = *targettag;
2322 :
2323 0 : while (GetParentPredicateLockTag(&parenttag, &nexttag))
2324 : {
2325 : uint32 targettaghash;
2326 : LOCALPREDICATELOCK *parentlock,
2327 : *rmlock PG_USED_FOR_ASSERTS_ONLY;
2328 :
2329 0 : parenttag = nexttag;
2330 0 : targettaghash = PredicateLockTargetTagHashCode(&parenttag);
2331 0 : parentlock = (LOCALPREDICATELOCK *)
2332 0 : hash_search_with_hash_value(LocalPredicateLockHash,
2333 : &parenttag, targettaghash,
2334 : HASH_FIND, NULL);
2335 :
2336 : /*
2337 : * There's a small chance the parent lock doesn't exist in the lock
2338 : * table. This can happen if we prematurely removed it because an
2339 : * index split caused the child refcount to be off.
2340 : */
2341 0 : if (parentlock == NULL)
2342 0 : continue;
2343 :
2344 0 : parentlock->childLocks--;
2345 :
2346 : /*
2347 : * Under similar circumstances the parent lock's refcount might be
2348 : * zero. This only happens if we're holding that lock (otherwise we
2349 : * would have removed the entry).
2350 : */
2351 0 : if (parentlock->childLocks < 0)
2352 : {
2353 0 : Assert(parentlock->held);
2354 0 : parentlock->childLocks = 0;
2355 : }
2356 :
2357 0 : if ((parentlock->childLocks == 0) && (!parentlock->held))
2358 : {
2359 0 : rmlock = (LOCALPREDICATELOCK *)
2360 0 : hash_search_with_hash_value(LocalPredicateLockHash,
2361 : &parenttag, targettaghash,
2362 : HASH_REMOVE, NULL);
2363 0 : Assert(rmlock == parentlock);
2364 : }
2365 : }
2366 0 : }
2367 :
2368 : /*
2369 : * Indicate that a predicate lock on the given target is held by the
2370 : * specified transaction. Has no effect if the lock is already held.
2371 : *
2372 : * This updates the lock table and the sxact's lock list, and creates
2373 : * the lock target if necessary, but does *not* do anything related to
2374 : * granularity promotion or the local lock table. See
2375 : * PredicateLockAcquire for that.
2376 : */
2377 : static void
2378 8 : CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag,
2379 : uint32 targettaghash,
2380 : SERIALIZABLEXACT *sxact)
2381 : {
2382 : PREDICATELOCKTARGET *target;
2383 : PREDICATELOCKTAG locktag;
2384 : PREDICATELOCK *lock;
2385 : LWLock *partitionLock;
2386 : bool found;
2387 :
2388 8 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
2389 :
2390 8 : LWLockAcquire(SerializablePredicateLockListLock, LW_SHARED);
2391 8 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
2392 :
2393 : /* Make sure that the target is represented. */
2394 8 : target = (PREDICATELOCKTARGET *)
2395 8 : hash_search_with_hash_value(PredicateLockTargetHash,
2396 : targettag, targettaghash,
2397 : HASH_ENTER_NULL, &found);
2398 8 : if (!target)
2399 0 : ereport(ERROR,
2400 : (errcode(ERRCODE_OUT_OF_MEMORY),
2401 : errmsg("out of shared memory"),
2402 : errhint("You might need to increase max_pred_locks_per_transaction.")));
2403 8 : if (!found)
2404 7 : SHMQueueInit(&(target->predicateLocks));
2405 :
2406 : /* We've got the sxact and target, make sure they're joined. */
2407 8 : locktag.myTarget = target;
2408 8 : locktag.myXact = sxact;
2409 8 : lock = (PREDICATELOCK *)
2410 8 : hash_search_with_hash_value(PredicateLockHash, &locktag,
2411 8 : PredicateLockHashCodeFromTargetHashCode(&locktag, targettaghash),
2412 : HASH_ENTER_NULL, &found);
2413 8 : if (!lock)
2414 0 : ereport(ERROR,
2415 : (errcode(ERRCODE_OUT_OF_MEMORY),
2416 : errmsg("out of shared memory"),
2417 : errhint("You might need to increase max_pred_locks_per_transaction.")));
2418 :
2419 8 : if (!found)
2420 : {
2421 8 : SHMQueueInsertBefore(&(target->predicateLocks), &(lock->targetLink));
2422 8 : SHMQueueInsertBefore(&(sxact->predicateLocks),
2423 : &(lock->xactLink));
2424 8 : lock->commitSeqNo = InvalidSerCommitSeqNo;
2425 : }
2426 :
2427 8 : LWLockRelease(partitionLock);
2428 8 : LWLockRelease(SerializablePredicateLockListLock);
2429 8 : }
2430 :
2431 : /*
2432 : * Acquire a predicate lock on the specified target for the current
2433 : * connection if not already held. This updates the local lock table
2434 : * and uses it to implement granularity promotion. It will consolidate
2435 : * multiple locks into a coarser lock if warranted, and will release
2436 : * any finer-grained locks covered by the new one.
2437 : */
2438 : static void
2439 12 : PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag)
2440 : {
2441 : uint32 targettaghash;
2442 : bool found;
2443 : LOCALPREDICATELOCK *locallock;
2444 :
2445 : /* Do we have the lock already, or a covering lock? */
2446 12 : if (PredicateLockExists(targettag))
2447 8 : return;
2448 :
2449 8 : if (CoarserLockCovers(targettag))
2450 0 : return;
2451 :
2452 : /* the same hash and LW lock apply to the lock target and the local lock. */
2453 8 : targettaghash = PredicateLockTargetTagHashCode(targettag);
2454 :
2455 : /* Acquire lock in local table */
2456 8 : locallock = (LOCALPREDICATELOCK *)
2457 8 : hash_search_with_hash_value(LocalPredicateLockHash,
2458 : targettag, targettaghash,
2459 : HASH_ENTER, &found);
2460 8 : locallock->held = true;
2461 8 : if (!found)
2462 8 : locallock->childLocks = 0;
2463 :
2464 : /* Actually create the lock */
2465 8 : CreatePredicateLock(targettag, targettaghash, MySerializableXact);
2466 :
2467 : /*
2468 : * Lock has been acquired. Check whether it should be promoted to a
2469 : * coarser granularity, or whether there are finer-granularity locks to
2470 : * clean up.
2471 : */
2472 8 : if (CheckAndPromotePredicateLockRequest(targettag))
2473 : {
2474 : /*
2475 : * Lock request was promoted to a coarser-granularity lock, and that
2476 : * lock was acquired. It will delete this lock and any of its
2477 : * children, so we're done.
2478 : */
2479 : }
2480 : else
2481 : {
2482 : /* Clean up any finer-granularity locks */
2483 8 : if (GET_PREDICATELOCKTARGETTAG_TYPE(*targettag) != PREDLOCKTAG_TUPLE)
2484 8 : DeleteChildTargetLocks(targettag);
2485 : }
2486 : }
2487 :
2488 :
2489 : /*
2490 : * PredicateLockRelation
2491 : *
2492 : * Gets a predicate lock at the relation level.
2493 : * Skip if not in full serializable transaction isolation level.
2494 : * Skip if this is a temporary table.
2495 : * Clear any finer-grained predicate locks this session has on the relation.
2496 : */
2497 : void
2498 28148 : PredicateLockRelation(Relation relation, Snapshot snapshot)
2499 : {
2500 : PREDICATELOCKTARGETTAG tag;
2501 :
2502 28148 : if (!SerializationNeededForRead(relation, snapshot))
2503 56284 : return;
2504 :
2505 12 : SET_PREDICATELOCKTARGETTAG_RELATION(tag,
2506 : relation->rd_node.dbNode,
2507 : relation->rd_id);
2508 12 : PredicateLockAcquire(&tag);
2509 : }
2510 :
2511 : /*
2512 : * PredicateLockPage
2513 : *
2514 : * Gets a predicate lock at the page level.
2515 : * Skip if not in full serializable transaction isolation level.
2516 : * Skip if this is a temporary table.
2517 : * Skip if a coarser predicate lock already covers this page.
2518 : * Clear any finer-grained predicate locks this session has on the relation.
2519 : */
2520 : void
2521 821389 : PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot)
2522 : {
2523 : PREDICATELOCKTARGETTAG tag;
2524 :
2525 821389 : if (!SerializationNeededForRead(relation, snapshot))
2526 1642778 : return;
2527 :
2528 0 : SET_PREDICATELOCKTARGETTAG_PAGE(tag,
2529 : relation->rd_node.dbNode,
2530 : relation->rd_id,
2531 : blkno);
2532 0 : PredicateLockAcquire(&tag);
2533 : }
2534 :
2535 : /*
2536 : * PredicateLockTuple
2537 : *
2538 : * Gets a predicate lock at the tuple level.
2539 : * Skip if not in full serializable transaction isolation level.
2540 : * Skip if this is a temporary table.
2541 : */
2542 : void
2543 1140058 : PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot)
2544 : {
2545 : PREDICATELOCKTARGETTAG tag;
2546 : ItemPointer tid;
2547 : TransactionId targetxmin;
2548 :
2549 1140058 : if (!SerializationNeededForRead(relation, snapshot))
2550 2280116 : return;
2551 :
2552 : /*
2553 : * If it's a heap tuple, return if this xact wrote it.
2554 : */
2555 0 : if (relation->rd_index == NULL)
2556 : {
2557 : TransactionId myxid;
2558 :
2559 0 : targetxmin = HeapTupleHeaderGetXmin(tuple->t_data);
2560 :
2561 0 : myxid = GetTopTransactionIdIfAny();
2562 0 : if (TransactionIdIsValid(myxid))
2563 : {
2564 0 : if (TransactionIdFollowsOrEquals(targetxmin, TransactionXmin))
2565 : {
2566 0 : TransactionId xid = SubTransGetTopmostTransaction(targetxmin);
2567 :
2568 0 : if (TransactionIdEquals(xid, myxid))
2569 : {
2570 : /* We wrote it; we already have a write lock. */
2571 0 : return;
2572 : }
2573 : }
2574 : }
2575 : }
2576 :
2577 : /*
2578 : * Do quick-but-not-definitive test for a relation lock first. This will
2579 : * never cause a return when the relation is *not* locked, but will
2580 : * occasionally let the check continue when there really *is* a relation
2581 : * level lock.
2582 : */
2583 0 : SET_PREDICATELOCKTARGETTAG_RELATION(tag,
2584 : relation->rd_node.dbNode,
2585 : relation->rd_id);
2586 0 : if (PredicateLockExists(&tag))
2587 0 : return;
2588 :
2589 0 : tid = &(tuple->t_self);
2590 0 : SET_PREDICATELOCKTARGETTAG_TUPLE(tag,
2591 : relation->rd_node.dbNode,
2592 : relation->rd_id,
2593 : ItemPointerGetBlockNumber(tid),
2594 : ItemPointerGetOffsetNumber(tid));
2595 0 : PredicateLockAcquire(&tag);
2596 : }
2597 :
2598 :
2599 : /*
2600 : * DeleteLockTarget
2601 : *
2602 : * Remove a predicate lock target along with any locks held for it.
2603 : *
2604 : * Caller must hold SerializablePredicateLockListLock and the
2605 : * appropriate hash partition lock for the target.
2606 : */
2607 : static void
2608 0 : DeleteLockTarget(PREDICATELOCKTARGET *target, uint32 targettaghash)
2609 : {
2610 : PREDICATELOCK *predlock;
2611 : SHM_QUEUE *predlocktargetlink;
2612 : PREDICATELOCK *nextpredlock;
2613 : bool found;
2614 :
2615 0 : Assert(LWLockHeldByMe(SerializablePredicateLockListLock));
2616 0 : Assert(LWLockHeldByMe(PredicateLockHashPartitionLock(targettaghash)));
2617 :
2618 0 : predlock = (PREDICATELOCK *)
2619 0 : SHMQueueNext(&(target->predicateLocks),
2620 0 : &(target->predicateLocks),
2621 : offsetof(PREDICATELOCK, targetLink));
2622 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
2623 0 : while (predlock)
2624 : {
2625 0 : predlocktargetlink = &(predlock->targetLink);
2626 0 : nextpredlock = (PREDICATELOCK *)
2627 0 : SHMQueueNext(&(target->predicateLocks),
2628 : predlocktargetlink,
2629 : offsetof(PREDICATELOCK, targetLink));
2630 :
2631 0 : SHMQueueDelete(&(predlock->xactLink));
2632 0 : SHMQueueDelete(&(predlock->targetLink));
2633 :
2634 0 : hash_search_with_hash_value
2635 : (PredicateLockHash,
2636 0 : &predlock->tag,
2637 0 : PredicateLockHashCodeFromTargetHashCode(&predlock->tag,
2638 : targettaghash),
2639 : HASH_REMOVE, &found);
2640 0 : Assert(found);
2641 :
2642 0 : predlock = nextpredlock;
2643 : }
2644 0 : LWLockRelease(SerializableXactHashLock);
2645 :
2646 : /* Remove the target itself, if possible. */
2647 0 : RemoveTargetIfNoLongerUsed(target, targettaghash);
2648 0 : }
2649 :
2650 :
2651 : /*
2652 : * TransferPredicateLocksToNewTarget
2653 : *
2654 : * Move or copy all the predicate locks for a lock target, for use by
2655 : * index page splits/combines and other things that create or replace
2656 : * lock targets. If 'removeOld' is true, the old locks and the target
2657 : * will be removed.
2658 : *
2659 : * Returns true on success, or false if we ran out of shared memory to
2660 : * allocate the new target or locks. Guaranteed to always succeed if
2661 : * removeOld is set (by using the scratch entry in PredicateLockTargetHash
2662 : * for scratch space).
2663 : *
2664 : * Warning: the "removeOld" option should be used only with care,
2665 : * because this function does not (indeed, can not) update other
2666 : * backends' LocalPredicateLockHash. If we are only adding new
2667 : * entries, this is not a problem: the local lock table is used only
2668 : * as a hint, so missing entries for locks that are held are
2669 : * OK. Having entries for locks that are no longer held, as can happen
2670 : * when using "removeOld", is not in general OK. We can only use it
2671 : * safely when replacing a lock with a coarser-granularity lock that
2672 : * covers it, or if we are absolutely certain that no one will need to
2673 : * refer to that lock in the future.
2674 : *
2675 : * Caller must hold SerializablePredicateLockListLock.
2676 : */
2677 : static bool
2678 138 : TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag,
2679 : PREDICATELOCKTARGETTAG newtargettag,
2680 : bool removeOld)
2681 : {
2682 : uint32 oldtargettaghash;
2683 : LWLock *oldpartitionLock;
2684 : PREDICATELOCKTARGET *oldtarget;
2685 : uint32 newtargettaghash;
2686 : LWLock *newpartitionLock;
2687 : bool found;
2688 138 : bool outOfShmem = false;
2689 :
2690 138 : Assert(LWLockHeldByMe(SerializablePredicateLockListLock));
2691 :
2692 138 : oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag);
2693 138 : newtargettaghash = PredicateLockTargetTagHashCode(&newtargettag);
2694 138 : oldpartitionLock = PredicateLockHashPartitionLock(oldtargettaghash);
2695 138 : newpartitionLock = PredicateLockHashPartitionLock(newtargettaghash);
2696 :
2697 138 : if (removeOld)
2698 : {
2699 : /*
2700 : * Remove the dummy entry to give us scratch space, so we know we'll
2701 : * be able to create the new lock target.
2702 : */
2703 0 : RemoveScratchTarget(false);
2704 : }
2705 :
2706 : /*
2707 : * We must get the partition locks in ascending sequence to avoid
2708 : * deadlocks. If old and new partitions are the same, we must request the
2709 : * lock only once.
2710 : */
2711 138 : if (oldpartitionLock < newpartitionLock)
2712 : {
2713 70 : LWLockAcquire(oldpartitionLock,
2714 : (removeOld ? LW_EXCLUSIVE : LW_SHARED));
2715 70 : LWLockAcquire(newpartitionLock, LW_EXCLUSIVE);
2716 : }
2717 68 : else if (oldpartitionLock > newpartitionLock)
2718 : {
2719 60 : LWLockAcquire(newpartitionLock, LW_EXCLUSIVE);
2720 60 : LWLockAcquire(oldpartitionLock,
2721 : (removeOld ? LW_EXCLUSIVE : LW_SHARED));
2722 : }
2723 : else
2724 8 : LWLockAcquire(newpartitionLock, LW_EXCLUSIVE);
2725 :
2726 : /*
2727 : * Look for the old target. If not found, that's OK; no predicate locks
2728 : * are affected, so we can just clean up and return. If it does exist,
2729 : * walk its list of predicate locks and move or copy them to the new
2730 : * target.
2731 : */
2732 138 : oldtarget = hash_search_with_hash_value(PredicateLockTargetHash,
2733 : &oldtargettag,
2734 : oldtargettaghash,
2735 : HASH_FIND, NULL);
2736 :
2737 138 : if (oldtarget)
2738 : {
2739 : PREDICATELOCKTARGET *newtarget;
2740 : PREDICATELOCK *oldpredlock;
2741 : PREDICATELOCKTAG newpredlocktag;
2742 :
2743 0 : newtarget = hash_search_with_hash_value(PredicateLockTargetHash,
2744 : &newtargettag,
2745 : newtargettaghash,
2746 : HASH_ENTER_NULL, &found);
2747 :
2748 0 : if (!newtarget)
2749 : {
2750 : /* Failed to allocate due to insufficient shmem */
2751 0 : outOfShmem = true;
2752 0 : goto exit;
2753 : }
2754 :
2755 : /* If we created a new entry, initialize it */
2756 0 : if (!found)
2757 0 : SHMQueueInit(&(newtarget->predicateLocks));
2758 :
2759 0 : newpredlocktag.myTarget = newtarget;
2760 :
2761 : /*
2762 : * Loop through all the locks on the old target, replacing them with
2763 : * locks on the new target.
2764 : */
2765 0 : oldpredlock = (PREDICATELOCK *)
2766 0 : SHMQueueNext(&(oldtarget->predicateLocks),
2767 0 : &(oldtarget->predicateLocks),
2768 : offsetof(PREDICATELOCK, targetLink));
2769 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
2770 0 : while (oldpredlock)
2771 : {
2772 : SHM_QUEUE *predlocktargetlink;
2773 : PREDICATELOCK *nextpredlock;
2774 : PREDICATELOCK *newpredlock;
2775 0 : SerCommitSeqNo oldCommitSeqNo = oldpredlock->commitSeqNo;
2776 :
2777 0 : predlocktargetlink = &(oldpredlock->targetLink);
2778 0 : nextpredlock = (PREDICATELOCK *)
2779 0 : SHMQueueNext(&(oldtarget->predicateLocks),
2780 : predlocktargetlink,
2781 : offsetof(PREDICATELOCK, targetLink));
2782 0 : newpredlocktag.myXact = oldpredlock->tag.myXact;
2783 :
2784 0 : if (removeOld)
2785 : {
2786 0 : SHMQueueDelete(&(oldpredlock->xactLink));
2787 0 : SHMQueueDelete(&(oldpredlock->targetLink));
2788 :
2789 0 : hash_search_with_hash_value
2790 : (PredicateLockHash,
2791 0 : &oldpredlock->tag,
2792 0 : PredicateLockHashCodeFromTargetHashCode(&oldpredlock->tag,
2793 : oldtargettaghash),
2794 : HASH_REMOVE, &found);
2795 0 : Assert(found);
2796 : }
2797 :
2798 0 : newpredlock = (PREDICATELOCK *)
2799 0 : hash_search_with_hash_value(PredicateLockHash,
2800 : &newpredlocktag,
2801 0 : PredicateLockHashCodeFromTargetHashCode(&newpredlocktag,
2802 : newtargettaghash),
2803 : HASH_ENTER_NULL,
2804 : &found);
2805 0 : if (!newpredlock)
2806 : {
2807 : /* Out of shared memory. Undo what we've done so far. */
2808 0 : LWLockRelease(SerializableXactHashLock);
2809 0 : DeleteLockTarget(newtarget, newtargettaghash);
2810 0 : outOfShmem = true;
2811 0 : goto exit;
2812 : }
2813 0 : if (!found)
2814 : {
2815 0 : SHMQueueInsertBefore(&(newtarget->predicateLocks),
2816 : &(newpredlock->targetLink));
2817 0 : SHMQueueInsertBefore(&(newpredlocktag.myXact->predicateLocks),
2818 : &(newpredlock->xactLink));
2819 0 : newpredlock->commitSeqNo = oldCommitSeqNo;
2820 : }
2821 : else
2822 : {
2823 0 : if (newpredlock->commitSeqNo < oldCommitSeqNo)
2824 0 : newpredlock->commitSeqNo = oldCommitSeqNo;
2825 : }
2826 :
2827 0 : Assert(newpredlock->commitSeqNo != 0);
2828 0 : Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
2829 : || (newpredlock->tag.myXact == OldCommittedSxact));
2830 :
2831 0 : oldpredlock = nextpredlock;
2832 : }
2833 0 : LWLockRelease(SerializableXactHashLock);
2834 :
2835 0 : if (removeOld)
2836 : {
2837 0 : Assert(SHMQueueEmpty(&oldtarget->predicateLocks));
2838 0 : RemoveTargetIfNoLongerUsed(oldtarget, oldtargettaghash);
2839 : }
2840 : }
2841 :
2842 :
2843 : exit:
2844 : /* Release partition locks in reverse order of acquisition. */
2845 138 : if (oldpartitionLock < newpartitionLock)
2846 : {
2847 70 : LWLockRelease(newpartitionLock);
2848 70 : LWLockRelease(oldpartitionLock);
2849 : }
2850 68 : else if (oldpartitionLock > newpartitionLock)
2851 : {
2852 60 : LWLockRelease(oldpartitionLock);
2853 60 : LWLockRelease(newpartitionLock);
2854 : }
2855 : else
2856 8 : LWLockRelease(newpartitionLock);
2857 :
2858 138 : if (removeOld)
2859 : {
2860 : /* We shouldn't run out of memory if we're moving locks */
2861 0 : Assert(!outOfShmem);
2862 :
2863 : /* Put the scratch entry back */
2864 0 : RestoreScratchTarget(false);
2865 : }
2866 :
2867 138 : return !outOfShmem;
2868 : }
2869 :
2870 : /*
2871 : * Drop all predicate locks of any granularity from the specified relation,
2872 : * which can be a heap relation or an index relation. If 'transfer' is true,
2873 : * acquire a relation lock on the heap for any transactions with any lock(s)
2874 : * on the specified relation.
2875 : *
2876 : * This requires grabbing a lot of LW locks and scanning the entire lock
2877 : * target table for matches. That makes this more expensive than most
2878 : * predicate lock management functions, but it will only be called for DDL
2879 : * type commands that are expensive anyway, and there are fast returns when
2880 : * no serializable transactions are active or the relation is temporary.
2881 : *
2882 : * We don't use the TransferPredicateLocksToNewTarget function because it
2883 : * acquires its own locks on the partitions of the two targets involved,
2884 : * and we'll already be holding all partition locks.
2885 : *
2886 : * We can't throw an error from here, because the call could be from a
2887 : * transaction which is not serializable.
2888 : *
2889 : * NOTE: This is currently only called with transfer set to true, but that may
2890 : * change. If we decide to clean up the locks from a table on commit of a
2891 : * transaction which executed DROP TABLE, the false condition will be useful.
2892 : */
2893 : static void
2894 1173 : DropAllPredicateLocksFromTable(Relation relation, bool transfer)
2895 : {
2896 : HASH_SEQ_STATUS seqstat;
2897 : PREDICATELOCKTARGET *oldtarget;
2898 : PREDICATELOCKTARGET *heaptarget;
2899 : Oid dbId;
2900 : Oid relId;
2901 : Oid heapId;
2902 : int i;
2903 : bool isIndex;
2904 : bool found;
2905 : uint32 heaptargettaghash;
2906 :
2907 : /*
2908 : * Bail out quickly if there are no serializable transactions running.
2909 : * It's safe to check this without taking locks because the caller is
2910 : * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
2911 : * would matter here can be acquired while that is held.
2912 : */
2913 1173 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
2914 2319 : return;
2915 :
2916 18 : if (!PredicateLockingNeededForRelation(relation))
2917 9 : return;
2918 :
2919 9 : dbId = relation->rd_node.dbNode;
2920 9 : relId = relation->rd_id;
2921 9 : if (relation->rd_index == NULL)
2922 : {
2923 0 : isIndex = false;
2924 0 : heapId = relId;
2925 : }
2926 : else
2927 : {
2928 9 : isIndex = true;
2929 9 : heapId = relation->rd_index->indrelid;
2930 : }
2931 9 : Assert(heapId != InvalidOid);
2932 9 : Assert(transfer || !isIndex); /* index OID only makes sense with
2933 : * transfer */
2934 :
2935 : /* Retrieve first time needed, then keep. */
2936 9 : heaptargettaghash = 0;
2937 9 : heaptarget = NULL;
2938 :
2939 : /* Acquire locks on all lock partitions */
2940 9 : LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
2941 153 : for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
2942 144 : LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_EXCLUSIVE);
2943 9 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
2944 :
2945 : /*
2946 : * Remove the dummy entry to give us scratch space, so we know we'll be
2947 : * able to create the new lock target.
2948 : */
2949 9 : if (transfer)
2950 9 : RemoveScratchTarget(true);
2951 :
2952 : /* Scan through target map */
2953 9 : hash_seq_init(&seqstat, PredicateLockTargetHash);
2954 :
2955 9 : while ((oldtarget = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
2956 : {
2957 : PREDICATELOCK *oldpredlock;
2958 :
2959 : /*
2960 : * Check whether this is a target which needs attention.
2961 : */
2962 10 : if (GET_PREDICATELOCKTARGETTAG_RELATION(oldtarget->tag) != relId)
2963 10 : continue; /* wrong relation id */
2964 0 : if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId)
2965 0 : continue; /* wrong database id */
2966 0 : if (transfer && !isIndex
2967 0 : && GET_PREDICATELOCKTARGETTAG_TYPE(oldtarget->tag) == PREDLOCKTAG_RELATION)
2968 0 : continue; /* already the right lock */
2969 :
2970 : /*
2971 : * If we made it here, we have work to do. We make sure the heap
2972 : * relation lock exists, then we walk the list of predicate locks for
2973 : * the old target we found, moving all locks to the heap relation lock
2974 : * -- unless they already hold that.
2975 : */
2976 :
2977 : /*
2978 : * First make sure we have the heap relation target. We only need to
2979 : * do this once.
2980 : */
2981 0 : if (transfer && heaptarget == NULL)
2982 : {
2983 : PREDICATELOCKTARGETTAG heaptargettag;
2984 :
2985 0 : SET_PREDICATELOCKTARGETTAG_RELATION(heaptargettag, dbId, heapId);
2986 0 : heaptargettaghash = PredicateLockTargetTagHashCode(&heaptargettag);
2987 0 : heaptarget = hash_search_with_hash_value(PredicateLockTargetHash,
2988 : &heaptargettag,
2989 : heaptargettaghash,
2990 : HASH_ENTER, &found);
2991 0 : if (!found)
2992 0 : SHMQueueInit(&heaptarget->predicateLocks);
2993 : }
2994 :
2995 : /*
2996 : * Loop through all the locks on the old target, replacing them with
2997 : * locks on the new target.
2998 : */
2999 0 : oldpredlock = (PREDICATELOCK *)
3000 0 : SHMQueueNext(&(oldtarget->predicateLocks),
3001 0 : &(oldtarget->predicateLocks),
3002 : offsetof(PREDICATELOCK, targetLink));
3003 0 : while (oldpredlock)
3004 : {
3005 : PREDICATELOCK *nextpredlock;
3006 : PREDICATELOCK *newpredlock;
3007 : SerCommitSeqNo oldCommitSeqNo;
3008 : SERIALIZABLEXACT *oldXact;
3009 :
3010 0 : nextpredlock = (PREDICATELOCK *)
3011 0 : SHMQueueNext(&(oldtarget->predicateLocks),
3012 0 : &(oldpredlock->targetLink),
3013 : offsetof(PREDICATELOCK, targetLink));
3014 :
3015 : /*
3016 : * Remove the old lock first. This avoids the chance of running
3017 : * out of lock structure entries for the hash table.
3018 : */
3019 0 : oldCommitSeqNo = oldpredlock->commitSeqNo;
3020 0 : oldXact = oldpredlock->tag.myXact;
3021 :
3022 0 : SHMQueueDelete(&(oldpredlock->xactLink));
3023 :
3024 : /*
3025 : * No need for retail delete from oldtarget list, we're removing
3026 : * the whole target anyway.
3027 : */
3028 0 : hash_search(PredicateLockHash,
3029 0 : &oldpredlock->tag,
3030 : HASH_REMOVE, &found);
3031 0 : Assert(found);
3032 :
3033 0 : if (transfer)
3034 : {
3035 : PREDICATELOCKTAG newpredlocktag;
3036 :
3037 0 : newpredlocktag.myTarget = heaptarget;
3038 0 : newpredlocktag.myXact = oldXact;
3039 0 : newpredlock = (PREDICATELOCK *)
3040 0 : hash_search_with_hash_value(PredicateLockHash,
3041 : &newpredlocktag,
3042 0 : PredicateLockHashCodeFromTargetHashCode(&newpredlocktag,
3043 : heaptargettaghash),
3044 : HASH_ENTER,
3045 : &found);
3046 0 : if (!found)
3047 : {
3048 0 : SHMQueueInsertBefore(&(heaptarget->predicateLocks),
3049 : &(newpredlock->targetLink));
3050 0 : SHMQueueInsertBefore(&(newpredlocktag.myXact->predicateLocks),
3051 : &(newpredlock->xactLink));
3052 0 : newpredlock->commitSeqNo = oldCommitSeqNo;
3053 : }
3054 : else
3055 : {
3056 0 : if (newpredlock->commitSeqNo < oldCommitSeqNo)
3057 0 : newpredlock->commitSeqNo = oldCommitSeqNo;
3058 : }
3059 :
3060 0 : Assert(newpredlock->commitSeqNo != 0);
3061 0 : Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo)
3062 : || (newpredlock->tag.myXact == OldCommittedSxact));
3063 : }
3064 :
3065 0 : oldpredlock = nextpredlock;
3066 : }
3067 :
3068 0 : hash_search(PredicateLockTargetHash, &oldtarget->tag, HASH_REMOVE,
3069 : &found);
3070 0 : Assert(found);
3071 : }
3072 :
3073 : /* Put the scratch entry back */
3074 9 : if (transfer)
3075 9 : RestoreScratchTarget(true);
3076 :
3077 : /* Release locks in reverse order */
3078 9 : LWLockRelease(SerializableXactHashLock);
3079 153 : for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
3080 144 : LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
3081 9 : LWLockRelease(SerializablePredicateLockListLock);
3082 : }
3083 :
3084 : /*
3085 : * TransferPredicateLocksToHeapRelation
3086 : * For all transactions, transfer all predicate locks for the given
3087 : * relation to a single relation lock on the heap.
3088 : */
3089 : void
3090 1173 : TransferPredicateLocksToHeapRelation(Relation relation)
3091 : {
3092 1173 : DropAllPredicateLocksFromTable(relation, true);
3093 1173 : }
3094 :
3095 :
3096 : /*
3097 : * PredicateLockPageSplit
3098 : *
3099 : * Copies any predicate locks for the old page to the new page.
3100 : * Skip if this is a temporary table or toast table.
3101 : *
3102 : * NOTE: A page split (or overflow) affects all serializable transactions,
3103 : * even if it occurs in the context of another transaction isolation level.
3104 : *
3105 : * NOTE: This currently leaves the local copy of the locks without
3106 : * information on the new lock which is in shared memory. This could cause
3107 : * problems if enough page splits occur on locked pages without the processes
3108 : * which hold the locks getting in and noticing.
3109 : */
3110 : void
3111 1004 : PredicateLockPageSplit(Relation relation, BlockNumber oldblkno,
3112 : BlockNumber newblkno)
3113 : {
3114 : PREDICATELOCKTARGETTAG oldtargettag;
3115 : PREDICATELOCKTARGETTAG newtargettag;
3116 : bool success;
3117 :
3118 : /*
3119 : * Bail out quickly if there are no serializable transactions running.
3120 : *
3121 : * It's safe to do this check without taking any additional locks. Even if
3122 : * a serializable transaction starts concurrently, we know it can't take
3123 : * any SIREAD locks on the page being split because the caller is holding
3124 : * the associated buffer page lock. Memory reordering isn't an issue; the
3125 : * memory barrier in the LWLock acquisition guarantees that this read
3126 : * occurs while the buffer page lock is held.
3127 : */
3128 1004 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
3129 1725 : return;
3130 :
3131 145 : if (!PredicateLockingNeededForRelation(relation))
3132 7 : return;
3133 :
3134 138 : Assert(oldblkno != newblkno);
3135 138 : Assert(BlockNumberIsValid(oldblkno));
3136 138 : Assert(BlockNumberIsValid(newblkno));
3137 :
3138 138 : SET_PREDICATELOCKTARGETTAG_PAGE(oldtargettag,
3139 : relation->rd_node.dbNode,
3140 : relation->rd_id,
3141 : oldblkno);
3142 138 : SET_PREDICATELOCKTARGETTAG_PAGE(newtargettag,
3143 : relation->rd_node.dbNode,
3144 : relation->rd_id,
3145 : newblkno);
3146 :
3147 138 : LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
3148 :
3149 : /*
3150 : * Try copying the locks over to the new page's tag, creating it if
3151 : * necessary.
3152 : */
3153 138 : success = TransferPredicateLocksToNewTarget(oldtargettag,
3154 : newtargettag,
3155 : false);
3156 :
3157 138 : if (!success)
3158 : {
3159 : /*
3160 : * No more predicate lock entries are available. Failure isn't an
3161 : * option here, so promote the page lock to a relation lock.
3162 : */
3163 :
3164 : /* Get the parent relation lock's lock tag */
3165 0 : success = GetParentPredicateLockTag(&oldtargettag,
3166 : &newtargettag);
3167 0 : Assert(success);
3168 :
3169 : /*
3170 : * Move the locks to the parent. This shouldn't fail.
3171 : *
3172 : * Note that here we are removing locks held by other backends,
3173 : * leading to a possible inconsistency in their local lock hash table.
3174 : * This is OK because we're replacing it with a lock that covers the
3175 : * old one.
3176 : */
3177 0 : success = TransferPredicateLocksToNewTarget(oldtargettag,
3178 : newtargettag,
3179 : true);
3180 0 : Assert(success);
3181 : }
3182 :
3183 138 : LWLockRelease(SerializablePredicateLockListLock);
3184 : }
3185 :
3186 : /*
3187 : * PredicateLockPageCombine
3188 : *
3189 : * Combines predicate locks for two existing pages.
3190 : * Skip if this is a temporary table or toast table.
3191 : *
3192 : * NOTE: A page combine affects all serializable transactions, even if it
3193 : * occurs in the context of another transaction isolation level.
3194 : */
3195 : void
3196 135 : PredicateLockPageCombine(Relation relation, BlockNumber oldblkno,
3197 : BlockNumber newblkno)
3198 : {
3199 : /*
3200 : * Page combines differ from page splits in that we ought to be able to
3201 : * remove the locks on the old page after transferring them to the new
3202 : * page, instead of duplicating them. However, because we can't edit other
3203 : * backends' local lock tables, removing the old lock would leave them
3204 : * with an entry in their LocalPredicateLockHash for a lock they're not
3205 : * holding, which isn't acceptable. So we wind up having to do the same
3206 : * work as a page split, acquiring a lock on the new page and keeping the
3207 : * old page locked too. That can lead to some false positives, but should
3208 : * be rare in practice.
3209 : */
3210 135 : PredicateLockPageSplit(relation, oldblkno, newblkno);
3211 135 : }
3212 :
3213 : /*
3214 : * Walk the list of in-progress serializable transactions and find the new
3215 : * xmin.
3216 : */
3217 : static void
3218 8 : SetNewSxactGlobalXmin(void)
3219 : {
3220 : SERIALIZABLEXACT *sxact;
3221 :
3222 8 : Assert(LWLockHeldByMe(SerializableXactHashLock));
3223 :
3224 8 : PredXact->SxactGlobalXmin = InvalidTransactionId;
3225 8 : PredXact->SxactGlobalXminCount = 0;
3226 :
3227 26 : for (sxact = FirstPredXact(); sxact != NULL; sxact = NextPredXact(sxact))
3228 : {
3229 18 : if (!SxactIsRolledBack(sxact)
3230 14 : && !SxactIsCommitted(sxact)
3231 0 : && sxact != OldCommittedSxact)
3232 : {
3233 0 : Assert(sxact->xmin != InvalidTransactionId);
3234 0 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)
3235 0 : || TransactionIdPrecedes(sxact->xmin,
3236 0 : PredXact->SxactGlobalXmin))
3237 : {
3238 0 : PredXact->SxactGlobalXmin = sxact->xmin;
3239 0 : PredXact->SxactGlobalXminCount = 1;
3240 : }
3241 0 : else if (TransactionIdEquals(sxact->xmin,
3242 : PredXact->SxactGlobalXmin))
3243 0 : PredXact->SxactGlobalXminCount++;
3244 : }
3245 : }
3246 :
3247 8 : OldSerXidSetActiveSerXmin(PredXact->SxactGlobalXmin);
3248 8 : }
3249 :
3250 : /*
3251 : * ReleasePredicateLocks
3252 : *
3253 : * Releases predicate locks based on completion of the current transaction,
3254 : * whether committed or rolled back. It can also be called for a read only
3255 : * transaction when it becomes impossible for the transaction to become
3256 : * part of a dangerous structure.
3257 : *
3258 : * We do nothing unless this is a serializable transaction.
3259 : *
3260 : * This method must ensure that shared memory hash tables are cleaned
3261 : * up in some relatively timely fashion.
3262 : *
3263 : * If this transaction is committing and is holding any predicate locks,
3264 : * it must be added to a list of completed serializable transactions still
3265 : * holding locks.
3266 : */
3267 : void
3268 26224 : ReleasePredicateLocks(bool isCommit)
3269 : {
3270 : bool needToClear;
3271 : RWConflict conflict,
3272 : nextConflict,
3273 : possibleUnsafeConflict;
3274 : SERIALIZABLEXACT *roXact;
3275 :
3276 : /*
3277 : * We can't trust XactReadOnly here, because a transaction which started
3278 : * as READ WRITE can show as READ ONLY later, e.g., within
3279 : * subtransactions. We want to flag a transaction as READ ONLY if it
3280 : * commits without writing so that de facto READ ONLY transactions get the
3281 : * benefit of some RO optimizations, so we will use this local variable to
3282 : * get some cleanup logic right which is based on whether the transaction
3283 : * was declared READ ONLY at the top level.
3284 : */
3285 : bool topLevelIsDeclaredReadOnly;
3286 :
3287 26224 : if (MySerializableXact == InvalidSerializableXact)
3288 : {
3289 26212 : Assert(LocalPredicateLockHash == NULL);
3290 52436 : return;
3291 : }
3292 :
3293 12 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
3294 :
3295 12 : Assert(!isCommit || SxactIsPrepared(MySerializableXact));
3296 12 : Assert(!isCommit || !SxactIsDoomed(MySerializableXact));
3297 12 : Assert(!SxactIsCommitted(MySerializableXact));
3298 12 : Assert(!SxactIsRolledBack(MySerializableXact));
3299 :
3300 : /* may not be serializable during COMMIT/ROLLBACK PREPARED */
3301 12 : Assert(MySerializableXact->pid == 0 || IsolationIsSerializable());
3302 :
3303 : /* We'd better not already be on the cleanup list. */
3304 12 : Assert(!SxactIsOnFinishedList(MySerializableXact));
3305 :
3306 12 : topLevelIsDeclaredReadOnly = SxactIsReadOnly(MySerializableXact);
3307 :
3308 : /*
3309 : * We don't hold XidGenLock lock here, assuming that TransactionId is
3310 : * atomic!
3311 : *
3312 : * If this value is changing, we don't care that much whether we get the
3313 : * old or new value -- it is just used to determine how far
3314 : * GlobalSerializableXmin must advance before this transaction can be
3315 : * fully cleaned up. The worst that could happen is we wait for one more
3316 : * transaction to complete before freeing some RAM; correctness of visible
3317 : * behavior is not affected.
3318 : */
3319 12 : MySerializableXact->finishedBefore = ShmemVariableCache->nextXid;
3320 :
3321 : /*
3322 : * If it's not a commit it's a rollback, and we can clear our locks
3323 : * immediately.
3324 : */
3325 12 : if (isCommit)
3326 : {
3327 6 : MySerializableXact->flags |= SXACT_FLAG_COMMITTED;
3328 6 : MySerializableXact->commitSeqNo = ++(PredXact->LastSxactCommitSeqNo);
3329 : /* Recognize implicit read-only transaction (commit without write). */
3330 6 : if (!MyXactDidWrite)
3331 0 : MySerializableXact->flags |= SXACT_FLAG_READ_ONLY;
3332 : }
3333 : else
3334 : {
3335 : /*
3336 : * The DOOMED flag indicates that we intend to roll back this
3337 : * transaction and so it should not cause serialization failures for
3338 : * other transactions that conflict with it. Note that this flag might
3339 : * already be set, if another backend marked this transaction for
3340 : * abort.
3341 : *
3342 : * The ROLLED_BACK flag further indicates that ReleasePredicateLocks
3343 : * has been called, and so the SerializableXact is eligible for
3344 : * cleanup. This means it should not be considered when calculating
3345 : * SxactGlobalXmin.
3346 : */
3347 6 : MySerializableXact->flags |= SXACT_FLAG_DOOMED;
3348 6 : MySerializableXact->flags |= SXACT_FLAG_ROLLED_BACK;
3349 :
3350 : /*
3351 : * If the transaction was previously prepared, but is now failing due
3352 : * to a ROLLBACK PREPARED or (hopefully very rare) error after the
3353 : * prepare, clear the prepared flag. This simplifies conflict
3354 : * checking.
3355 : */
3356 6 : MySerializableXact->flags &= ~SXACT_FLAG_PREPARED;
3357 : }
3358 :
3359 12 : if (!topLevelIsDeclaredReadOnly)
3360 : {
3361 12 : Assert(PredXact->WritableSxactCount > 0);
3362 12 : if (--(PredXact->WritableSxactCount) == 0)
3363 : {
3364 : /*
3365 : * Release predicate locks and rw-conflicts in for all committed
3366 : * transactions. There are no longer any transactions which might
3367 : * conflict with the locks and no chance for new transactions to
3368 : * overlap. Similarly, existing conflicts in can't cause pivots,
3369 : * and any conflicts in which could have completed a dangerous
3370 : * structure would already have caused a rollback, so any
3371 : * remaining ones must be benign.
3372 : */
3373 8 : PredXact->CanPartialClearThrough = PredXact->LastSxactCommitSeqNo;
3374 : }
3375 : }
3376 : else
3377 : {
3378 : /*
3379 : * Read-only transactions: clear the list of transactions that might
3380 : * make us unsafe. Note that we use 'inLink' for the iteration as
3381 : * opposed to 'outLink' for the r/w xacts.
3382 : */
3383 0 : possibleUnsafeConflict = (RWConflict)
3384 0 : SHMQueueNext(&MySerializableXact->possibleUnsafeConflicts,
3385 0 : &MySerializableXact->possibleUnsafeConflicts,
3386 : offsetof(RWConflictData, inLink));
3387 0 : while (possibleUnsafeConflict)
3388 : {
3389 0 : nextConflict = (RWConflict)
3390 0 : SHMQueueNext(&MySerializableXact->possibleUnsafeConflicts,
3391 0 : &possibleUnsafeConflict->inLink,
3392 : offsetof(RWConflictData, inLink));
3393 :
3394 0 : Assert(!SxactIsReadOnly(possibleUnsafeConflict->sxactOut));
3395 0 : Assert(MySerializableXact == possibleUnsafeConflict->sxactIn);
3396 :
3397 0 : ReleaseRWConflict(possibleUnsafeConflict);
3398 :
3399 0 : possibleUnsafeConflict = nextConflict;
3400 : }
3401 : }
3402 :
3403 : /* Check for conflict out to old committed transactions. */
3404 12 : if (isCommit
3405 6 : && !SxactIsReadOnly(MySerializableXact)
3406 6 : && SxactHasSummaryConflictOut(MySerializableXact))
3407 : {
3408 : /*
3409 : * we don't know which old committed transaction we conflicted with,
3410 : * so be conservative and use FirstNormalSerCommitSeqNo here
3411 : */
3412 0 : MySerializableXact->SeqNo.earliestOutConflictCommit =
3413 : FirstNormalSerCommitSeqNo;
3414 0 : MySerializableXact->flags |= SXACT_FLAG_CONFLICT_OUT;
3415 : }
3416 :
3417 : /*
3418 : * Release all outConflicts to committed transactions. If we're rolling
3419 : * back clear them all. Set SXACT_FLAG_CONFLICT_OUT if any point to
3420 : * previously committed transactions.
3421 : */
3422 12 : conflict = (RWConflict)
3423 12 : SHMQueueNext(&MySerializableXact->outConflicts,
3424 12 : &MySerializableXact->outConflicts,
3425 : offsetof(RWConflictData, outLink));
3426 25 : while (conflict)
3427 : {
3428 1 : nextConflict = (RWConflict)
3429 1 : SHMQueueNext(&MySerializableXact->outConflicts,
3430 1 : &conflict->outLink,
3431 : offsetof(RWConflictData, outLink));
3432 :
3433 1 : if (isCommit
3434 0 : && !SxactIsReadOnly(MySerializableXact)
3435 0 : && SxactIsCommitted(conflict->sxactIn))
3436 : {
3437 0 : if ((MySerializableXact->flags & SXACT_FLAG_CONFLICT_OUT) == 0
3438 0 : || conflict->sxactIn->prepareSeqNo < MySerializableXact->SeqNo.earliestOutConflictCommit)
3439 0 : MySerializableXact->SeqNo.earliestOutConflictCommit = conflict->sxactIn->prepareSeqNo;
3440 0 : MySerializableXact->flags |= SXACT_FLAG_CONFLICT_OUT;
3441 : }
3442 :
3443 1 : if (!isCommit
3444 0 : || SxactIsCommitted(conflict->sxactIn)
3445 0 : || (conflict->sxactIn->SeqNo.lastCommitBeforeSnapshot >= PredXact->LastSxactCommitSeqNo))
3446 1 : ReleaseRWConflict(conflict);
3447 :
3448 1 : conflict = nextConflict;
3449 : }
3450 :
3451 : /*
3452 : * Release all inConflicts from committed and read-only transactions. If
3453 : * we're rolling back, clear them all.
3454 : */
3455 12 : conflict = (RWConflict)
3456 12 : SHMQueueNext(&MySerializableXact->inConflicts,
3457 12 : &MySerializableXact->inConflicts,
3458 : offsetof(RWConflictData, inLink));
3459 25 : while (conflict)
3460 : {
3461 1 : nextConflict = (RWConflict)
3462 1 : SHMQueueNext(&MySerializableXact->inConflicts,
3463 1 : &conflict->inLink,
3464 : offsetof(RWConflictData, inLink));
3465 :
3466 1 : if (!isCommit
3467 0 : || SxactIsCommitted(conflict->sxactOut)
3468 0 : || SxactIsReadOnly(conflict->sxactOut))
3469 1 : ReleaseRWConflict(conflict);
3470 :
3471 1 : conflict = nextConflict;
3472 : }
3473 :
3474 12 : if (!topLevelIsDeclaredReadOnly)
3475 : {
3476 : /*
3477 : * Remove ourselves from the list of possible conflicts for concurrent
3478 : * READ ONLY transactions, flagging them as unsafe if we have a
3479 : * conflict out. If any are waiting DEFERRABLE transactions, wake them
3480 : * up if they are known safe or known unsafe.
3481 : */
3482 12 : possibleUnsafeConflict = (RWConflict)
3483 12 : SHMQueueNext(&MySerializableXact->possibleUnsafeConflicts,
3484 12 : &MySerializableXact->possibleUnsafeConflicts,
3485 : offsetof(RWConflictData, outLink));
3486 24 : while (possibleUnsafeConflict)
3487 : {
3488 0 : nextConflict = (RWConflict)
3489 0 : SHMQueueNext(&MySerializableXact->possibleUnsafeConflicts,
3490 0 : &possibleUnsafeConflict->outLink,
3491 : offsetof(RWConflictData, outLink));
3492 :
3493 0 : roXact = possibleUnsafeConflict->sxactIn;
3494 0 : Assert(MySerializableXact == possibleUnsafeConflict->sxactOut);
3495 0 : Assert(SxactIsReadOnly(roXact));
3496 :
3497 : /* Mark conflicted if necessary. */
3498 0 : if (isCommit
3499 0 : && MyXactDidWrite
3500 0 : && SxactHasConflictOut(MySerializableXact)
3501 0 : && (MySerializableXact->SeqNo.earliestOutConflictCommit
3502 0 : <= roXact->SeqNo.lastCommitBeforeSnapshot))
3503 : {
3504 : /*
3505 : * This releases possibleUnsafeConflict (as well as all other
3506 : * possible conflicts for roXact)
3507 : */
3508 0 : FlagSxactUnsafe(roXact);
3509 : }
3510 : else
3511 : {
3512 0 : ReleaseRWConflict(possibleUnsafeConflict);
3513 :
3514 : /*
3515 : * If we were the last possible conflict, flag it safe. The
3516 : * transaction can now safely release its predicate locks (but
3517 : * that transaction's backend has to do that itself).
3518 : */
3519 0 : if (SHMQueueEmpty(&roXact->possibleUnsafeConflicts))
3520 0 : roXact->flags |= SXACT_FLAG_RO_SAFE;
3521 : }
3522 :
3523 : /*
3524 : * Wake up the process for a waiting DEFERRABLE transaction if we
3525 : * now know it's either safe or conflicted.
3526 : */
3527 0 : if (SxactIsDeferrableWaiting(roXact) &&
3528 0 : (SxactIsROUnsafe(roXact) || SxactIsROSafe(roXact)))
3529 0 : ProcSendSignal(roXact->pid);
3530 :
3531 0 : possibleUnsafeConflict = nextConflict;
3532 : }
3533 : }
3534 :
3535 : /*
3536 : * Check whether it's time to clean up old transactions. This can only be
3537 : * done when the last serializable transaction with the oldest xmin among
3538 : * serializable transactions completes. We then find the "new oldest"
3539 : * xmin and purge any transactions which finished before this transaction
3540 : * was launched.
3541 : */
3542 12 : needToClear = false;
3543 12 : if (TransactionIdEquals(MySerializableXact->xmin, PredXact->SxactGlobalXmin))
3544 : {
3545 10 : Assert(PredXact->SxactGlobalXminCount > 0);
3546 10 : if (--(PredXact->SxactGlobalXminCount) == 0)
3547 : {
3548 8 : SetNewSxactGlobalXmin();
3549 8 : needToClear = true;
3550 : }
3551 : }
3552 :
3553 12 : LWLockRelease(SerializableXactHashLock);
3554 :
3555 12 : LWLockAcquire(SerializableFinishedListLock, LW_EXCLUSIVE);
3556 :
3557 : /* Add this to the list of transactions to check for later cleanup. */
3558 12 : if (isCommit)
3559 6 : SHMQueueInsertBefore(FinishedSerializableTransactions,
3560 6 : &MySerializableXact->finishedLink);
3561 :
3562 12 : if (!isCommit)
3563 6 : ReleaseOneSerializableXact(MySerializableXact, false, false);
3564 :
3565 12 : LWLockRelease(SerializableFinishedListLock);
3566 :
3567 12 : if (needToClear)
3568 8 : ClearOldPredicateLocks();
3569 :
3570 12 : MySerializableXact = InvalidSerializableXact;
3571 12 : MyXactDidWrite = false;
3572 :
3573 : /* Delete per-transaction lock table */
3574 12 : if (LocalPredicateLockHash != NULL)
3575 : {
3576 6 : hash_destroy(LocalPredicateLockHash);
3577 6 : LocalPredicateLockHash = NULL;
3578 : }
3579 : }
3580 :
3581 : /*
3582 : * Clear old predicate locks, belonging to committed transactions that are no
3583 : * longer interesting to any in-progress transaction.
3584 : */
3585 : static void
3586 8 : ClearOldPredicateLocks(void)
3587 : {
3588 : SERIALIZABLEXACT *finishedSxact;
3589 : PREDICATELOCK *predlock;
3590 :
3591 : /*
3592 : * Loop through finished transactions. They are in commit order, so we can
3593 : * stop as soon as we find one that's still interesting.
3594 : */
3595 8 : LWLockAcquire(SerializableFinishedListLock, LW_EXCLUSIVE);
3596 8 : finishedSxact = (SERIALIZABLEXACT *)
3597 8 : SHMQueueNext(FinishedSerializableTransactions,
3598 : FinishedSerializableTransactions,
3599 : offsetof(SERIALIZABLEXACT, finishedLink));
3600 8 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
3601 22 : while (finishedSxact)
3602 : {
3603 : SERIALIZABLEXACT *nextSxact;
3604 :
3605 6 : nextSxact = (SERIALIZABLEXACT *)
3606 6 : SHMQueueNext(FinishedSerializableTransactions,
3607 6 : &(finishedSxact->finishedLink),
3608 : offsetof(SERIALIZABLEXACT, finishedLink));
3609 6 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)
3610 0 : || TransactionIdPrecedesOrEquals(finishedSxact->finishedBefore,
3611 0 : PredXact->SxactGlobalXmin))
3612 : {
3613 : /*
3614 : * This transaction committed before any in-progress transaction
3615 : * took its snapshot. It's no longer interesting.
3616 : */
3617 6 : LWLockRelease(SerializableXactHashLock);
3618 6 : SHMQueueDelete(&(finishedSxact->finishedLink));
3619 6 : ReleaseOneSerializableXact(finishedSxact, false, false);
3620 6 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
3621 : }
3622 0 : else if (finishedSxact->commitSeqNo > PredXact->HavePartialClearedThrough
3623 0 : && finishedSxact->commitSeqNo <= PredXact->CanPartialClearThrough)
3624 : {
3625 : /*
3626 : * Any active transactions that took their snapshot before this
3627 : * transaction committed are read-only, so we can clear part of
3628 : * its state.
3629 : */
3630 0 : LWLockRelease(SerializableXactHashLock);
3631 :
3632 0 : if (SxactIsReadOnly(finishedSxact))
3633 : {
3634 : /* A read-only transaction can be removed entirely */
3635 0 : SHMQueueDelete(&(finishedSxact->finishedLink));
3636 0 : ReleaseOneSerializableXact(finishedSxact, false, false);
3637 : }
3638 : else
3639 : {
3640 : /*
3641 : * A read-write transaction can only be partially cleared. We
3642 : * need to keep the SERIALIZABLEXACT but can release the
3643 : * SIREAD locks and conflicts in.
3644 : */
3645 0 : ReleaseOneSerializableXact(finishedSxact, true, false);
3646 : }
3647 :
3648 0 : PredXact->HavePartialClearedThrough = finishedSxact->commitSeqNo;
3649 0 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
3650 : }
3651 : else
3652 : {
3653 : /* Still interesting. */
3654 : break;
3655 : }
3656 6 : finishedSxact = nextSxact;
3657 : }
3658 8 : LWLockRelease(SerializableXactHashLock);
3659 :
3660 : /*
3661 : * Loop through predicate locks on dummy transaction for summarized data.
3662 : */
3663 8 : LWLockAcquire(SerializablePredicateLockListLock, LW_SHARED);
3664 8 : predlock = (PREDICATELOCK *)
3665 8 : SHMQueueNext(&OldCommittedSxact->predicateLocks,
3666 8 : &OldCommittedSxact->predicateLocks,
3667 : offsetof(PREDICATELOCK, xactLink));
3668 16 : while (predlock)
3669 : {
3670 : PREDICATELOCK *nextpredlock;
3671 : bool canDoPartialCleanup;
3672 :
3673 0 : nextpredlock = (PREDICATELOCK *)
3674 0 : SHMQueueNext(&OldCommittedSxact->predicateLocks,
3675 0 : &predlock->xactLink,
3676 : offsetof(PREDICATELOCK, xactLink));
3677 :
3678 0 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
3679 0 : Assert(predlock->commitSeqNo != 0);
3680 0 : Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3681 0 : canDoPartialCleanup = (predlock->commitSeqNo <= PredXact->CanPartialClearThrough);
3682 0 : LWLockRelease(SerializableXactHashLock);
3683 :
3684 : /*
3685 : * If this lock originally belonged to an old enough transaction, we
3686 : * can release it.
3687 : */
3688 0 : if (canDoPartialCleanup)
3689 : {
3690 : PREDICATELOCKTAG tag;
3691 : PREDICATELOCKTARGET *target;
3692 : PREDICATELOCKTARGETTAG targettag;
3693 : uint32 targettaghash;
3694 : LWLock *partitionLock;
3695 :
3696 0 : tag = predlock->tag;
3697 0 : target = tag.myTarget;
3698 0 : targettag = target->tag;
3699 0 : targettaghash = PredicateLockTargetTagHashCode(&targettag);
3700 0 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
3701 :
3702 0 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
3703 :
3704 0 : SHMQueueDelete(&(predlock->targetLink));
3705 0 : SHMQueueDelete(&(predlock->xactLink));
3706 :
3707 0 : hash_search_with_hash_value(PredicateLockHash, &tag,
3708 0 : PredicateLockHashCodeFromTargetHashCode(&tag,
3709 : targettaghash),
3710 : HASH_REMOVE, NULL);
3711 0 : RemoveTargetIfNoLongerUsed(target, targettaghash);
3712 :
3713 0 : LWLockRelease(partitionLock);
3714 : }
3715 :
3716 0 : predlock = nextpredlock;
3717 : }
3718 :
3719 8 : LWLockRelease(SerializablePredicateLockListLock);
3720 8 : LWLockRelease(SerializableFinishedListLock);
3721 8 : }
3722 :
3723 : /*
3724 : * This is the normal way to delete anything from any of the predicate
3725 : * locking hash tables. Given a transaction which we know can be deleted:
3726 : * delete all predicate locks held by that transaction and any predicate
3727 : * lock targets which are now unreferenced by a lock; delete all conflicts
3728 : * for the transaction; delete all xid values for the transaction; then
3729 : * delete the transaction.
3730 : *
3731 : * When the partial flag is set, we can release all predicate locks and
3732 : * in-conflict information -- we've established that there are no longer
3733 : * any overlapping read write transactions for which this transaction could
3734 : * matter -- but keep the transaction entry itself and any outConflicts.
3735 : *
3736 : * When the summarize flag is set, we've run short of room for sxact data
3737 : * and must summarize to the SLRU. Predicate locks are transferred to a
3738 : * dummy "old" transaction, with duplicate locks on a single target
3739 : * collapsing to a single lock with the "latest" commitSeqNo from among
3740 : * the conflicting locks..
3741 : */
3742 : static void
3743 12 : ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
3744 : bool summarize)
3745 : {
3746 : PREDICATELOCK *predlock;
3747 : SERIALIZABLEXIDTAG sxidtag;
3748 : RWConflict conflict,
3749 : nextConflict;
3750 :
3751 12 : Assert(sxact != NULL);
3752 12 : Assert(SxactIsRolledBack(sxact) || SxactIsCommitted(sxact));
3753 12 : Assert(partial || !SxactIsOnFinishedList(sxact));
3754 12 : Assert(LWLockHeldByMe(SerializableFinishedListLock));
3755 :
3756 : /*
3757 : * First release all the predicate locks held by this xact (or transfer
3758 : * them to OldCommittedSxact if summarize is true)
3759 : */
3760 12 : LWLockAcquire(SerializablePredicateLockListLock, LW_SHARED);
3761 12 : predlock = (PREDICATELOCK *)
3762 12 : SHMQueueNext(&(sxact->predicateLocks),
3763 12 : &(sxact->predicateLocks),
3764 : offsetof(PREDICATELOCK, xactLink));
3765 32 : while (predlock)
3766 : {
3767 : PREDICATELOCK *nextpredlock;
3768 : PREDICATELOCKTAG tag;
3769 : SHM_QUEUE *targetLink;
3770 : PREDICATELOCKTARGET *target;
3771 : PREDICATELOCKTARGETTAG targettag;
3772 : uint32 targettaghash;
3773 : LWLock *partitionLock;
3774 :
3775 8 : nextpredlock = (PREDICATELOCK *)
3776 8 : SHMQueueNext(&(sxact->predicateLocks),
3777 8 : &(predlock->xactLink),
3778 : offsetof(PREDICATELOCK, xactLink));
3779 :
3780 8 : tag = predlock->tag;
3781 8 : targetLink = &(predlock->targetLink);
3782 8 : target = tag.myTarget;
3783 8 : targettag = target->tag;
3784 8 : targettaghash = PredicateLockTargetTagHashCode(&targettag);
3785 8 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
3786 :
3787 8 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
3788 :
3789 8 : SHMQueueDelete(targetLink);
3790 :
3791 8 : hash_search_with_hash_value(PredicateLockHash, &tag,
3792 8 : PredicateLockHashCodeFromTargetHashCode(&tag,
3793 : targettaghash),
3794 : HASH_REMOVE, NULL);
3795 8 : if (summarize)
3796 : {
3797 : bool found;
3798 :
3799 : /* Fold into dummy transaction list. */
3800 0 : tag.myXact = OldCommittedSxact;
3801 0 : predlock = hash_search_with_hash_value(PredicateLockHash, &tag,
3802 0 : PredicateLockHashCodeFromTargetHashCode(&tag,
3803 : targettaghash),
3804 : HASH_ENTER_NULL, &found);
3805 0 : if (!predlock)
3806 0 : ereport(ERROR,
3807 : (errcode(ERRCODE_OUT_OF_MEMORY),
3808 : errmsg("out of shared memory"),
3809 : errhint("You might need to increase max_pred_locks_per_transaction.")));
3810 0 : if (found)
3811 : {
3812 0 : Assert(predlock->commitSeqNo != 0);
3813 0 : Assert(predlock->commitSeqNo != InvalidSerCommitSeqNo);
3814 0 : if (predlock->commitSeqNo < sxact->commitSeqNo)
3815 0 : predlock->commitSeqNo = sxact->commitSeqNo;
3816 : }
3817 : else
3818 : {
3819 0 : SHMQueueInsertBefore(&(target->predicateLocks),
3820 : &(predlock->targetLink));
3821 0 : SHMQueueInsertBefore(&(OldCommittedSxact->predicateLocks),
3822 : &(predlock->xactLink));
3823 0 : predlock->commitSeqNo = sxact->commitSeqNo;
3824 : }
3825 : }
3826 : else
3827 8 : RemoveTargetIfNoLongerUsed(target, targettaghash);
3828 :
3829 8 : LWLockRelease(partitionLock);
3830 :
3831 8 : predlock = nextpredlock;
3832 : }
3833 :
3834 : /*
3835 : * Rather than retail removal, just re-init the head after we've run
3836 : * through the list.
3837 : */
3838 12 : SHMQueueInit(&sxact->predicateLocks);
3839 :
3840 12 : LWLockRelease(SerializablePredicateLockListLock);
3841 :
3842 12 : sxidtag.xid = sxact->topXid;
3843 12 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
3844 :
3845 : /* Release all outConflicts (unless 'partial' is true) */
3846 12 : if (!partial)
3847 : {
3848 12 : conflict = (RWConflict)
3849 12 : SHMQueueNext(&sxact->outConflicts,
3850 12 : &sxact->outConflicts,
3851 : offsetof(RWConflictData, outLink));
3852 24 : while (conflict)
3853 : {
3854 0 : nextConflict = (RWConflict)
3855 0 : SHMQueueNext(&sxact->outConflicts,
3856 0 : &conflict->outLink,
3857 : offsetof(RWConflictData, outLink));
3858 0 : if (summarize)
3859 0 : conflict->sxactIn->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
3860 0 : ReleaseRWConflict(conflict);
3861 0 : conflict = nextConflict;
3862 : }
3863 : }
3864 :
3865 : /* Release all inConflicts. */
3866 12 : conflict = (RWConflict)
3867 12 : SHMQueueNext(&sxact->inConflicts,
3868 12 : &sxact->inConflicts,
3869 : offsetof(RWConflictData, inLink));
3870 24 : while (conflict)
3871 : {
3872 0 : nextConflict = (RWConflict)
3873 0 : SHMQueueNext(&sxact->inConflicts,
3874 0 : &conflict->inLink,
3875 : offsetof(RWConflictData, inLink));
3876 0 : if (summarize)
3877 0 : conflict->sxactOut->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
3878 0 : ReleaseRWConflict(conflict);
3879 0 : conflict = nextConflict;
3880 : }
3881 :
3882 : /* Finally, get rid of the xid and the record of the transaction itself. */
3883 12 : if (!partial)
3884 : {
3885 12 : if (sxidtag.xid != InvalidTransactionId)
3886 12 : hash_search(SerializableXidHash, &sxidtag, HASH_REMOVE, NULL);
3887 12 : ReleasePredXact(sxact);
3888 : }
3889 :
3890 12 : LWLockRelease(SerializableXactHashLock);
3891 12 : }
3892 :
3893 : /*
3894 : * Tests whether the given top level transaction is concurrent with
3895 : * (overlaps) our current transaction.
3896 : *
3897 : * We need to identify the top level transaction for SSI, anyway, so pass
3898 : * that to this function to save the overhead of checking the snapshot's
3899 : * subxip array.
3900 : */
3901 : static bool
3902 2 : XidIsConcurrent(TransactionId xid)
3903 : {
3904 : Snapshot snap;
3905 : uint32 i;
3906 :
3907 2 : Assert(TransactionIdIsValid(xid));
3908 2 : Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny()));
3909 :
3910 2 : snap = GetTransactionSnapshot();
3911 :
3912 2 : if (TransactionIdPrecedes(xid, snap->xmin))
3913 0 : return false;
3914 :
3915 2 : if (TransactionIdFollowsOrEquals(xid, snap->xmax))
3916 2 : return true;
3917 :
3918 0 : for (i = 0; i < snap->xcnt; i++)
3919 : {
3920 0 : if (xid == snap->xip[i])
3921 0 : return true;
3922 : }
3923 :
3924 0 : return false;
3925 : }
3926 :
3927 : /*
3928 : * CheckForSerializableConflictOut
3929 : * We are reading a tuple which has been modified. If it is visible to
3930 : * us but has been deleted, that indicates a rw-conflict out. If it's
3931 : * not visible and was created by a concurrent (overlapping)
3932 : * serializable transaction, that is also a rw-conflict out,
3933 : *
3934 : * We will determine the top level xid of the writing transaction with which
3935 : * we may be in conflict, and check for overlap with our own transaction.
3936 : * If the transactions overlap (i.e., they cannot see each other's writes),
3937 : * then we have a conflict out.
3938 : *
3939 : * This function should be called just about anywhere in heapam.c where a
3940 : * tuple has been read. The caller must hold at least a shared lock on the
3941 : * buffer, because this function might set hint bits on the tuple. There is
3942 : * currently no known reason to call this function from an index AM.
3943 : */
3944 : void
3945 5049830 : CheckForSerializableConflictOut(bool visible, Relation relation,
3946 : HeapTuple tuple, Buffer buffer,
3947 : Snapshot snapshot)
3948 : {
3949 : TransactionId xid;
3950 : SERIALIZABLEXIDTAG sxidtag;
3951 : SERIALIZABLEXID *sxid;
3952 : SERIALIZABLEXACT *sxact;
3953 : HTSV_Result htsvResult;
3954 :
3955 5049830 : if (!SerializationNeededForRead(relation, snapshot))
3956 10099573 : return;
3957 :
3958 : /* Check if someone else has already decided that we need to die */
3959 86 : if (SxactIsDoomed(MySerializableXact))
3960 : {
3961 0 : ereport(ERROR,
3962 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
3963 : errmsg("could not serialize access due to read/write dependencies among transactions"),
3964 : errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict out checking."),
3965 : errhint("The transaction might succeed if retried.")));
3966 : }
3967 :
3968 : /*
3969 : * Check to see whether the tuple has been written to by a concurrent
3970 : * transaction, either to create it not visible to us, or to delete it
3971 : * while it is visible to us. The "visible" bool indicates whether the
3972 : * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
3973 : * is going on with it.
3974 : */
3975 86 : htsvResult = HeapTupleSatisfiesVacuum(tuple, TransactionXmin, buffer);
3976 86 : switch (htsvResult)
3977 : {
3978 : case HEAPTUPLE_LIVE:
3979 59 : if (visible)
3980 59 : return;
3981 0 : xid = HeapTupleHeaderGetXmin(tuple->t_data);
3982 0 : break;
3983 : case HEAPTUPLE_RECENTLY_DEAD:
3984 0 : if (!visible)
3985 0 : return;
3986 0 : xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
3987 0 : break;
3988 : case HEAPTUPLE_DELETE_IN_PROGRESS:
3989 5 : xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
3990 5 : break;
3991 : case HEAPTUPLE_INSERT_IN_PROGRESS:
3992 10 : xid = HeapTupleHeaderGetXmin(tuple->t_data);
3993 10 : break;
3994 : case HEAPTUPLE_DEAD:
3995 12 : return;
3996 : default:
3997 :
3998 : /*
3999 : * The only way to get to this default clause is if a new value is
4000 : * added to the enum type without adding it to this switch
4001 : * statement. That's a bug, so elog.
4002 : */
4003 0 : elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
4004 :
4005 : /*
4006 : * In spite of having all enum values covered and calling elog on
4007 : * this default, some compilers think this is a code path which
4008 : * allows xid to be used below without initialization. Silence
4009 : * that warning.
4010 : */
4011 : xid = InvalidTransactionId;
4012 : }
4013 15 : Assert(TransactionIdIsValid(xid));
4014 15 : Assert(TransactionIdFollowsOrEquals(xid, TransactionXmin));
4015 :
4016 : /*
4017 : * Find top level xid. Bail out if xid is too early to be a conflict, or
4018 : * if it's our own xid.
4019 : */
4020 15 : if (TransactionIdEquals(xid, GetTopTransactionIdIfAny()))
4021 13 : return;
4022 2 : xid = SubTransGetTopmostTransaction(xid);
4023 2 : if (TransactionIdPrecedes(xid, TransactionXmin))
4024 0 : return;
4025 2 : if (TransactionIdEquals(xid, GetTopTransactionIdIfAny()))
4026 0 : return;
4027 :
4028 : /*
4029 : * Find sxact or summarized info for the top level xid.
4030 : */
4031 2 : sxidtag.xid = xid;
4032 2 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4033 2 : sxid = (SERIALIZABLEXID *)
4034 2 : hash_search(SerializableXidHash, &sxidtag, HASH_FIND, NULL);
4035 2 : if (!sxid)
4036 : {
4037 : /*
4038 : * Transaction not found in "normal" SSI structures. Check whether it
4039 : * got pushed out to SLRU storage for "old committed" transactions.
4040 : */
4041 : SerCommitSeqNo conflictCommitSeqNo;
4042 :
4043 0 : conflictCommitSeqNo = OldSerXidGetMinConflictCommitSeqNo(xid);
4044 0 : if (conflictCommitSeqNo != 0)
4045 : {
4046 0 : if (conflictCommitSeqNo != InvalidSerCommitSeqNo
4047 0 : && (!SxactIsReadOnly(MySerializableXact)
4048 0 : || conflictCommitSeqNo
4049 0 : <= MySerializableXact->SeqNo.lastCommitBeforeSnapshot))
4050 0 : ereport(ERROR,
4051 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4052 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4053 : errdetail_internal("Reason code: Canceled on conflict out to old pivot %u.", xid),
4054 : errhint("The transaction might succeed if retried.")));
4055 :
4056 0 : if (SxactHasSummaryConflictIn(MySerializableXact)
4057 0 : || !SHMQueueEmpty(&MySerializableXact->inConflicts))
4058 0 : ereport(ERROR,
4059 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4060 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4061 : errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction %u.", xid),
4062 : errhint("The transaction might succeed if retried.")));
4063 :
4064 0 : MySerializableXact->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
4065 : }
4066 :
4067 : /* It's not serializable or otherwise not important. */
4068 0 : LWLockRelease(SerializableXactHashLock);
4069 0 : return;
4070 : }
4071 2 : sxact = sxid->myXact;
4072 2 : Assert(TransactionIdEquals(sxact->topXid, xid));
4073 2 : if (sxact == MySerializableXact || SxactIsDoomed(sxact))
4074 : {
4075 : /* Can't conflict with ourself or a transaction that will roll back. */
4076 0 : LWLockRelease(SerializableXactHashLock);
4077 0 : return;
4078 : }
4079 :
4080 : /*
4081 : * We have a conflict out to a transaction which has a conflict out to a
4082 : * summarized transaction. That summarized transaction must have
4083 : * committed first, and we can't tell when it committed in relation to our
4084 : * snapshot acquisition, so something needs to be canceled.
4085 : */
4086 2 : if (SxactHasSummaryConflictOut(sxact))
4087 : {
4088 0 : if (!SxactIsPrepared(sxact))
4089 : {
4090 0 : sxact->flags |= SXACT_FLAG_DOOMED;
4091 0 : LWLockRelease(SerializableXactHashLock);
4092 0 : return;
4093 : }
4094 : else
4095 : {
4096 0 : LWLockRelease(SerializableXactHashLock);
4097 0 : ereport(ERROR,
4098 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4099 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4100 : errdetail_internal("Reason code: Canceled on conflict out to old pivot."),
4101 : errhint("The transaction might succeed if retried.")));
4102 : }
4103 : }
4104 :
4105 : /*
4106 : * If this is a read-only transaction and the writing transaction has
4107 : * committed, and it doesn't have a rw-conflict to a transaction which
4108 : * committed before it, no conflict.
4109 : */
4110 2 : if (SxactIsReadOnly(MySerializableXact)
4111 0 : && SxactIsCommitted(sxact)
4112 0 : && !SxactHasSummaryConflictOut(sxact)
4113 0 : && (!SxactHasConflictOut(sxact)
4114 0 : || MySerializableXact->SeqNo.lastCommitBeforeSnapshot < sxact->SeqNo.earliestOutConflictCommit))
4115 : {
4116 : /* Read-only transaction will appear to run first. No conflict. */
4117 0 : LWLockRelease(SerializableXactHashLock);
4118 0 : return;
4119 : }
4120 :
4121 2 : if (!XidIsConcurrent(xid))
4122 : {
4123 : /* This write was already in our snapshot; no conflict. */
4124 0 : LWLockRelease(SerializableXactHashLock);
4125 0 : return;
4126 : }
4127 :
4128 2 : if (RWConflictExists(MySerializableXact, sxact))
4129 : {
4130 : /* We don't want duplicate conflict records in the list. */
4131 1 : LWLockRelease(SerializableXactHashLock);
4132 1 : return;
4133 : }
4134 :
4135 : /*
4136 : * Flag the conflict. But first, if this conflict creates a dangerous
4137 : * structure, ereport an error.
4138 : */
4139 1 : FlagRWConflict(MySerializableXact, sxact);
4140 1 : LWLockRelease(SerializableXactHashLock);
4141 : }
4142 :
4143 : /*
4144 : * Check a particular target for rw-dependency conflict in. A subroutine of
4145 : * CheckForSerializableConflictIn().
4146 : */
4147 : static void
4148 30 : CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag)
4149 : {
4150 : uint32 targettaghash;
4151 : LWLock *partitionLock;
4152 : PREDICATELOCKTARGET *target;
4153 : PREDICATELOCK *predlock;
4154 30 : PREDICATELOCK *mypredlock = NULL;
4155 : PREDICATELOCKTAG mypredlocktag;
4156 :
4157 30 : Assert(MySerializableXact != InvalidSerializableXact);
4158 :
4159 : /*
4160 : * The same hash and LW lock apply to the lock target and the lock itself.
4161 : */
4162 30 : targettaghash = PredicateLockTargetTagHashCode(targettag);
4163 30 : partitionLock = PredicateLockHashPartitionLock(targettaghash);
4164 30 : LWLockAcquire(partitionLock, LW_SHARED);
4165 30 : target = (PREDICATELOCKTARGET *)
4166 30 : hash_search_with_hash_value(PredicateLockTargetHash,
4167 : targettag, targettaghash,
4168 : HASH_FIND, NULL);
4169 30 : if (!target)
4170 : {
4171 : /* Nothing has this target locked; we're done here. */
4172 23 : LWLockRelease(partitionLock);
4173 52 : return;
4174 : }
4175 :
4176 : /*
4177 : * Each lock for an overlapping transaction represents a conflict: a
4178 : * rw-dependency in to this transaction.
4179 : */
4180 7 : predlock = (PREDICATELOCK *)
4181 7 : SHMQueueNext(&(target->predicateLocks),
4182 7 : &(target->predicateLocks),
4183 : offsetof(PREDICATELOCK, targetLink));
4184 7 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
4185 20 : while (predlock)
4186 : {
4187 : SHM_QUEUE *predlocktargetlink;
4188 : PREDICATELOCK *nextpredlock;
4189 : SERIALIZABLEXACT *sxact;
4190 :
4191 7 : predlocktargetlink = &(predlock->targetLink);
4192 7 : nextpredlock = (PREDICATELOCK *)
4193 7 : SHMQueueNext(&(target->predicateLocks),
4194 : predlocktargetlink,
4195 : offsetof(PREDICATELOCK, targetLink));
4196 :
4197 7 : sxact = predlock->tag.myXact;
4198 7 : if (sxact == MySerializableXact)
4199 : {
4200 : /*
4201 : * If we're getting a write lock on a tuple, we don't need a
4202 : * predicate (SIREAD) lock on the same tuple. We can safely remove
4203 : * our SIREAD lock, but we'll defer doing so until after the loop
4204 : * because that requires upgrading to an exclusive partition lock.
4205 : *
4206 : * We can't use this optimization within a subtransaction because
4207 : * the subtransaction could roll back, and we would be left
4208 : * without any lock at the top level.
4209 : */
4210 5 : if (!IsSubTransaction()
4211 5 : && GET_PREDICATELOCKTARGETTAG_OFFSET(*targettag))
4212 : {
4213 0 : mypredlock = predlock;
4214 0 : mypredlocktag = predlock->tag;
4215 : }
4216 : }
4217 2 : else if (!SxactIsDoomed(sxact)
4218 2 : && (!SxactIsCommitted(sxact)
4219 0 : || TransactionIdPrecedes(GetTransactionSnapshot()->xmin,
4220 : sxact->finishedBefore))
4221 2 : && !RWConflictExists(sxact, MySerializableXact))
4222 : {
4223 2 : LWLockRelease(SerializableXactHashLock);
4224 2 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4225 :
4226 : /*
4227 : * Re-check after getting exclusive lock because the other
4228 : * transaction may have flagged a conflict.
4229 : */
4230 2 : if (!SxactIsDoomed(sxact)
4231 2 : && (!SxactIsCommitted(sxact)
4232 0 : || TransactionIdPrecedes(GetTransactionSnapshot()->xmin,
4233 : sxact->finishedBefore))
4234 2 : && !RWConflictExists(sxact, MySerializableXact))
4235 : {
4236 2 : FlagRWConflict(sxact, MySerializableXact);
4237 : }
4238 :
4239 1 : LWLockRelease(SerializableXactHashLock);
4240 1 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
4241 : }
4242 :
4243 6 : predlock = nextpredlock;
4244 : }
4245 6 : LWLockRelease(SerializableXactHashLock);
4246 6 : LWLockRelease(partitionLock);
4247 :
4248 : /*
4249 : * If we found one of our own SIREAD locks to remove, remove it now.
4250 : *
4251 : * At this point our transaction already has an ExclusiveRowLock on the
4252 : * relation, so we are OK to drop the predicate lock on the tuple, if
4253 : * found, without fearing that another write against the tuple will occur
4254 : * before the MVCC information makes it to the buffer.
4255 : */
4256 6 : if (mypredlock != NULL)
4257 : {
4258 : uint32 predlockhashcode;
4259 : PREDICATELOCK *rmpredlock;
4260 :
4261 0 : LWLockAcquire(SerializablePredicateLockListLock, LW_SHARED);
4262 0 : LWLockAcquire(partitionLock, LW_EXCLUSIVE);
4263 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4264 :
4265 : /*
4266 : * Remove the predicate lock from shared memory, if it wasn't removed
4267 : * while the locks were released. One way that could happen is from
4268 : * autovacuum cleaning up an index.
4269 : */
4270 0 : predlockhashcode = PredicateLockHashCodeFromTargetHashCode
4271 : (&mypredlocktag, targettaghash);
4272 0 : rmpredlock = (PREDICATELOCK *)
4273 0 : hash_search_with_hash_value(PredicateLockHash,
4274 : &mypredlocktag,
4275 : predlockhashcode,
4276 : HASH_FIND, NULL);
4277 0 : if (rmpredlock != NULL)
4278 : {
4279 0 : Assert(rmpredlock == mypredlock);
4280 :
4281 0 : SHMQueueDelete(&(mypredlock->targetLink));
4282 0 : SHMQueueDelete(&(mypredlock->xactLink));
4283 :
4284 0 : rmpredlock = (PREDICATELOCK *)
4285 0 : hash_search_with_hash_value(PredicateLockHash,
4286 : &mypredlocktag,
4287 : predlockhashcode,
4288 : HASH_REMOVE, NULL);
4289 0 : Assert(rmpredlock == mypredlock);
4290 :
4291 0 : RemoveTargetIfNoLongerUsed(target, targettaghash);
4292 : }
4293 :
4294 0 : LWLockRelease(SerializableXactHashLock);
4295 0 : LWLockRelease(partitionLock);
4296 0 : LWLockRelease(SerializablePredicateLockListLock);
4297 :
4298 0 : if (rmpredlock != NULL)
4299 : {
4300 : /*
4301 : * Remove entry in local lock table if it exists. It's OK if it
4302 : * doesn't exist; that means the lock was transferred to a new
4303 : * target by a different backend.
4304 : */
4305 0 : hash_search_with_hash_value(LocalPredicateLockHash,
4306 : targettag, targettaghash,
4307 : HASH_REMOVE, NULL);
4308 :
4309 0 : DecrementParentLocks(targettag);
4310 : }
4311 : }
4312 : }
4313 :
4314 : /*
4315 : * CheckForSerializableConflictIn
4316 : * We are writing the given tuple. If that indicates a rw-conflict
4317 : * in from another serializable transaction, take appropriate action.
4318 : *
4319 : * Skip checking for any granularity for which a parameter is missing.
4320 : *
4321 : * A tuple update or delete is in conflict if we have a predicate lock
4322 : * against the relation or page in which the tuple exists, or against the
4323 : * tuple itself.
4324 : */
4325 : void
4326 1063171 : CheckForSerializableConflictIn(Relation relation, HeapTuple tuple,
4327 : Buffer buffer)
4328 : {
4329 : PREDICATELOCKTARGETTAG targettag;
4330 :
4331 1063171 : if (!SerializationNeededForWrite(relation))
4332 2126323 : return;
4333 :
4334 : /* Check if someone else has already decided that we need to die */
4335 18 : if (SxactIsDoomed(MySerializableXact))
4336 0 : ereport(ERROR,
4337 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4338 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4339 : errdetail_internal("Reason code: Canceled on identification as a pivot, during conflict in checking."),
4340 : errhint("The transaction might succeed if retried.")));
4341 :
4342 : /*
4343 : * We're doing a write which might cause rw-conflicts now or later.
4344 : * Memorize that fact.
4345 : */
4346 18 : MyXactDidWrite = true;
4347 :
4348 : /*
4349 : * It is important that we check for locks from the finest granularity to
4350 : * the coarsest granularity, so that granularity promotion doesn't cause
4351 : * us to miss a lock. The new (coarser) lock will be acquired before the
4352 : * old (finer) locks are released.
4353 : *
4354 : * It is not possible to take and hold a lock across the checks for all
4355 : * granularities because each target could be in a separate partition.
4356 : */
4357 18 : if (tuple != NULL)
4358 : {
4359 5 : SET_PREDICATELOCKTARGETTAG_TUPLE(targettag,
4360 : relation->rd_node.dbNode,
4361 : relation->rd_id,
4362 : ItemPointerGetBlockNumber(&(tuple->t_self)),
4363 : ItemPointerGetOffsetNumber(&(tuple->t_self)));
4364 5 : CheckTargetForConflictsIn(&targettag);
4365 : }
4366 :
4367 18 : if (BufferIsValid(buffer))
4368 : {
4369 7 : SET_PREDICATELOCKTARGETTAG_PAGE(targettag,
4370 : relation->rd_node.dbNode,
4371 : relation->rd_id,
4372 : BufferGetBlockNumber(buffer));
4373 7 : CheckTargetForConflictsIn(&targettag);
4374 : }
4375 :
4376 18 : SET_PREDICATELOCKTARGETTAG_RELATION(targettag,
4377 : relation->rd_node.dbNode,
4378 : relation->rd_id);
4379 18 : CheckTargetForConflictsIn(&targettag);
4380 : }
4381 :
4382 : /*
4383 : * CheckTableForSerializableConflictIn
4384 : * The entire table is going through a DDL-style logical mass delete
4385 : * like TRUNCATE or DROP TABLE. If that causes a rw-conflict in from
4386 : * another serializable transaction, take appropriate action.
4387 : *
4388 : * While these operations do not operate entirely within the bounds of
4389 : * snapshot isolation, they can occur inside a serializable transaction, and
4390 : * will logically occur after any reads which saw rows which were destroyed
4391 : * by these operations, so we do what we can to serialize properly under
4392 : * SSI.
4393 : *
4394 : * The relation passed in must be a heap relation. Any predicate lock of any
4395 : * granularity on the heap will cause a rw-conflict in to this transaction.
4396 : * Predicate locks on indexes do not matter because they only exist to guard
4397 : * against conflicting inserts into the index, and this is a mass *delete*.
4398 : * When a table is truncated or dropped, the index will also be truncated
4399 : * or dropped, and we'll deal with locks on the index when that happens.
4400 : *
4401 : * Dropping or truncating a table also needs to drop any existing predicate
4402 : * locks on heap tuples or pages, because they're about to go away. This
4403 : * should be done before altering the predicate locks because the transaction
4404 : * could be rolled back because of a conflict, in which case the lock changes
4405 : * are not needed. (At the moment, we don't actually bother to drop the
4406 : * existing locks on a dropped or truncated table at the moment. That might
4407 : * lead to some false positives, but it doesn't seem worth the trouble.)
4408 : */
4409 : void
4410 2230 : CheckTableForSerializableConflictIn(Relation relation)
4411 : {
4412 : HASH_SEQ_STATUS seqstat;
4413 : PREDICATELOCKTARGET *target;
4414 : Oid dbId;
4415 : Oid heapId;
4416 : int i;
4417 :
4418 : /*
4419 : * Bail out quickly if there are no serializable transactions running.
4420 : * It's safe to check this without taking locks because the caller is
4421 : * holding an ACCESS EXCLUSIVE lock on the relation. No new locks which
4422 : * would matter here can be acquired while that is held.
4423 : */
4424 2230 : if (!TransactionIdIsValid(PredXact->SxactGlobalXmin))
4425 4423 : return;
4426 :
4427 36 : if (!SerializationNeededForWrite(relation))
4428 35 : return;
4429 :
4430 : /*
4431 : * We're doing a write which might cause rw-conflicts now or later.
4432 : * Memorize that fact.
4433 : */
4434 1 : MyXactDidWrite = true;
4435 :
4436 1 : Assert(relation->rd_index == NULL); /* not an index relation */
4437 :
4438 1 : dbId = relation->rd_node.dbNode;
4439 1 : heapId = relation->rd_id;
4440 :
4441 1 : LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
4442 17 : for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
4443 16 : LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_SHARED);
4444 1 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4445 :
4446 : /* Scan through target list */
4447 1 : hash_seq_init(&seqstat, PredicateLockTargetHash);
4448 :
4449 3 : while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat)))
4450 : {
4451 : PREDICATELOCK *predlock;
4452 :
4453 : /*
4454 : * Check whether this is a target which needs attention.
4455 : */
4456 1 : if (GET_PREDICATELOCKTARGETTAG_RELATION(target->tag) != heapId)
4457 1 : continue; /* wrong relation id */
4458 0 : if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId)
4459 0 : continue; /* wrong database id */
4460 :
4461 : /*
4462 : * Loop through locks for this target and flag conflicts.
4463 : */
4464 0 : predlock = (PREDICATELOCK *)
4465 0 : SHMQueueNext(&(target->predicateLocks),
4466 0 : &(target->predicateLocks),
4467 : offsetof(PREDICATELOCK, targetLink));
4468 0 : while (predlock)
4469 : {
4470 : PREDICATELOCK *nextpredlock;
4471 :
4472 0 : nextpredlock = (PREDICATELOCK *)
4473 0 : SHMQueueNext(&(target->predicateLocks),
4474 0 : &(predlock->targetLink),
4475 : offsetof(PREDICATELOCK, targetLink));
4476 :
4477 0 : if (predlock->tag.myXact != MySerializableXact
4478 0 : && !RWConflictExists(predlock->tag.myXact, MySerializableXact))
4479 : {
4480 0 : FlagRWConflict(predlock->tag.myXact, MySerializableXact);
4481 : }
4482 :
4483 0 : predlock = nextpredlock;
4484 : }
4485 : }
4486 :
4487 : /* Release locks in reverse order */
4488 1 : LWLockRelease(SerializableXactHashLock);
4489 17 : for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
4490 16 : LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
4491 1 : LWLockRelease(SerializablePredicateLockListLock);
4492 : }
4493 :
4494 :
4495 : /*
4496 : * Flag a rw-dependency between two serializable transactions.
4497 : *
4498 : * The caller is responsible for ensuring that we have a LW lock on
4499 : * the transaction hash table.
4500 : */
4501 : static void
4502 3 : FlagRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer)
4503 : {
4504 3 : Assert(reader != writer);
4505 :
4506 : /* First, see if this conflict causes failure. */
4507 3 : OnConflict_CheckForSerializationFailure(reader, writer);
4508 :
4509 : /* Actually do the conflict flagging. */
4510 2 : if (reader == OldCommittedSxact)
4511 0 : writer->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
4512 2 : else if (writer == OldCommittedSxact)
4513 0 : reader->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
4514 : else
4515 2 : SetRWConflict(reader, writer);
4516 2 : }
4517 :
4518 : /*----------------------------------------------------------------------------
4519 : * We are about to add a RW-edge to the dependency graph - check that we don't
4520 : * introduce a dangerous structure by doing so, and abort one of the
4521 : * transactions if so.
4522 : *
4523 : * A serialization failure can only occur if there is a dangerous structure
4524 : * in the dependency graph:
4525 : *
4526 : * Tin ------> Tpivot ------> Tout
4527 : * rw rw
4528 : *
4529 : * Furthermore, Tout must commit first.
4530 : *
4531 : * One more optimization is that if Tin is declared READ ONLY (or commits
4532 : * without writing), we can only have a problem if Tout committed before Tin
4533 : * acquired its snapshot.
4534 : *----------------------------------------------------------------------------
4535 : */
4536 : static void
4537 3 : OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader,
4538 : SERIALIZABLEXACT *writer)
4539 : {
4540 : bool failure;
4541 : RWConflict conflict;
4542 :
4543 3 : Assert(LWLockHeldByMe(SerializableXactHashLock));
4544 :
4545 3 : failure = false;
4546 :
4547 : /*------------------------------------------------------------------------
4548 : * Check for already-committed writer with rw-conflict out flagged
4549 : * (conflict-flag on W means that T2 committed before W):
4550 : *
4551 : * R ------> W ------> T2
4552 : * rw rw
4553 : *
4554 : * That is a dangerous structure, so we must abort. (Since the writer
4555 : * has already committed, we must be the reader)
4556 : *------------------------------------------------------------------------
4557 : */
4558 3 : if (SxactIsCommitted(writer)
4559 0 : && (SxactHasConflictOut(writer) || SxactHasSummaryConflictOut(writer)))
4560 0 : failure = true;
4561 :
4562 : /*------------------------------------------------------------------------
4563 : * Check whether the writer has become a pivot with an out-conflict
4564 : * committed transaction (T2), and T2 committed first:
4565 : *
4566 : * R ------> W ------> T2
4567 : * rw rw
4568 : *
4569 : * Because T2 must've committed first, there is no anomaly if:
4570 : * - the reader committed before T2
4571 : * - the writer committed before T2
4572 : * - the reader is a READ ONLY transaction and the reader was concurrent
4573 : * with T2 (= reader acquired its snapshot before T2 committed)
4574 : *
4575 : * We also handle the case that T2 is prepared but not yet committed
4576 : * here. In that case T2 has already checked for conflicts, so if it
4577 : * commits first, making the above conflict real, it's too late for it
4578 : * to abort.
4579 : *------------------------------------------------------------------------
4580 : */
4581 3 : if (!failure)
4582 : {
4583 3 : if (SxactHasSummaryConflictOut(writer))
4584 : {
4585 0 : failure = true;
4586 0 : conflict = NULL;
4587 : }
4588 : else
4589 3 : conflict = (RWConflict)
4590 3 : SHMQueueNext(&writer->outConflicts,
4591 3 : &writer->outConflicts,
4592 : offsetof(RWConflictData, outLink));
4593 6 : while (conflict)
4594 : {
4595 1 : SERIALIZABLEXACT *t2 = conflict->sxactIn;
4596 :
4597 1 : if (SxactIsPrepared(t2)
4598 1 : && (!SxactIsCommitted(reader)
4599 0 : || t2->prepareSeqNo <= reader->commitSeqNo)
4600 1 : && (!SxactIsCommitted(writer)
4601 0 : || t2->prepareSeqNo <= writer->commitSeqNo)
4602 1 : && (!SxactIsReadOnly(reader)
4603 0 : || t2->prepareSeqNo <= reader->SeqNo.lastCommitBeforeSnapshot))
4604 : {
4605 1 : failure = true;
4606 1 : break;
4607 : }
4608 0 : conflict = (RWConflict)
4609 0 : SHMQueueNext(&writer->outConflicts,
4610 0 : &conflict->outLink,
4611 : offsetof(RWConflictData, outLink));
4612 : }
4613 : }
4614 :
4615 : /*------------------------------------------------------------------------
4616 : * Check whether the reader has become a pivot with a writer
4617 : * that's committed (or prepared):
4618 : *
4619 : * T0 ------> R ------> W
4620 : * rw rw
4621 : *
4622 : * Because W must've committed first for an anomaly to occur, there is no
4623 : * anomaly if:
4624 : * - T0 committed before the writer
4625 : * - T0 is READ ONLY, and overlaps the writer
4626 : *------------------------------------------------------------------------
4627 : */
4628 3 : if (!failure && SxactIsPrepared(writer) && !SxactIsReadOnly(reader))
4629 : {
4630 1 : if (SxactHasSummaryConflictIn(reader))
4631 : {
4632 0 : failure = true;
4633 0 : conflict = NULL;
4634 : }
4635 : else
4636 1 : conflict = (RWConflict)
4637 1 : SHMQueueNext(&reader->inConflicts,
4638 : &reader->inConflicts,
4639 : offsetof(RWConflictData, inLink));
4640 2 : while (conflict)
4641 : {
4642 0 : SERIALIZABLEXACT *t0 = conflict->sxactOut;
4643 :
4644 0 : if (!SxactIsDoomed(t0)
4645 0 : && (!SxactIsCommitted(t0)
4646 0 : || t0->commitSeqNo >= writer->prepareSeqNo)
4647 0 : && (!SxactIsReadOnly(t0)
4648 0 : || t0->SeqNo.lastCommitBeforeSnapshot >= writer->prepareSeqNo))
4649 : {
4650 0 : failure = true;
4651 0 : break;
4652 : }
4653 0 : conflict = (RWConflict)
4654 0 : SHMQueueNext(&reader->inConflicts,
4655 0 : &conflict->inLink,
4656 : offsetof(RWConflictData, inLink));
4657 : }
4658 : }
4659 :
4660 3 : if (failure)
4661 : {
4662 : /*
4663 : * We have to kill a transaction to avoid a possible anomaly from
4664 : * occurring. If the writer is us, we can just ereport() to cause a
4665 : * transaction abort. Otherwise we flag the writer for termination,
4666 : * causing it to abort when it tries to commit. However, if the writer
4667 : * is a prepared transaction, already prepared, we can't abort it
4668 : * anymore, so we have to kill the reader instead.
4669 : */
4670 1 : if (MySerializableXact == writer)
4671 : {
4672 1 : LWLockRelease(SerializableXactHashLock);
4673 1 : ereport(ERROR,
4674 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4675 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4676 : errdetail_internal("Reason code: Canceled on identification as a pivot, during write."),
4677 : errhint("The transaction might succeed if retried.")));
4678 : }
4679 0 : else if (SxactIsPrepared(writer))
4680 : {
4681 0 : LWLockRelease(SerializableXactHashLock);
4682 :
4683 : /* if we're not the writer, we have to be the reader */
4684 0 : Assert(MySerializableXact == reader);
4685 0 : ereport(ERROR,
4686 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4687 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4688 : errdetail_internal("Reason code: Canceled on conflict out to pivot %u, during read.", writer->topXid),
4689 : errhint("The transaction might succeed if retried.")));
4690 : }
4691 0 : writer->flags |= SXACT_FLAG_DOOMED;
4692 : }
4693 2 : }
4694 :
4695 : /*
4696 : * PreCommit_CheckForSerializableConflicts
4697 : * Check for dangerous structures in a serializable transaction
4698 : * at commit.
4699 : *
4700 : * We're checking for a dangerous structure as each conflict is recorded.
4701 : * The only way we could have a problem at commit is if this is the "out"
4702 : * side of a pivot, and neither the "in" side nor the pivot has yet
4703 : * committed.
4704 : *
4705 : * If a dangerous structure is found, the pivot (the near conflict) is
4706 : * marked for death, because rolling back another transaction might mean
4707 : * that we flail without ever making progress. This transaction is
4708 : * committing writes, so letting it commit ensures progress. If we
4709 : * canceled the far conflict, it might immediately fail again on retry.
4710 : */
4711 : void
4712 22913 : PreCommit_CheckForSerializationFailure(void)
4713 : {
4714 : RWConflict nearConflict;
4715 :
4716 22913 : if (MySerializableXact == InvalidSerializableXact)
4717 45816 : return;
4718 :
4719 10 : Assert(IsolationIsSerializable());
4720 :
4721 10 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4722 :
4723 : /* Check if someone else has already decided that we need to die */
4724 10 : if (SxactIsDoomed(MySerializableXact))
4725 : {
4726 0 : LWLockRelease(SerializableXactHashLock);
4727 0 : ereport(ERROR,
4728 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4729 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4730 : errdetail_internal("Reason code: Canceled on identification as a pivot, during commit attempt."),
4731 : errhint("The transaction might succeed if retried.")));
4732 : }
4733 :
4734 10 : nearConflict = (RWConflict)
4735 10 : SHMQueueNext(&MySerializableXact->inConflicts,
4736 10 : &MySerializableXact->inConflicts,
4737 : offsetof(RWConflictData, inLink));
4738 21 : while (nearConflict)
4739 : {
4740 1 : if (!SxactIsCommitted(nearConflict->sxactOut)
4741 1 : && !SxactIsDoomed(nearConflict->sxactOut))
4742 : {
4743 : RWConflict farConflict;
4744 :
4745 1 : farConflict = (RWConflict)
4746 1 : SHMQueueNext(&nearConflict->sxactOut->inConflicts,
4747 1 : &nearConflict->sxactOut->inConflicts,
4748 : offsetof(RWConflictData, inLink));
4749 2 : while (farConflict)
4750 : {
4751 0 : if (farConflict->sxactOut == MySerializableXact
4752 0 : || (!SxactIsCommitted(farConflict->sxactOut)
4753 0 : && !SxactIsReadOnly(farConflict->sxactOut)
4754 0 : && !SxactIsDoomed(farConflict->sxactOut)))
4755 : {
4756 : /*
4757 : * Normally, we kill the pivot transaction to make sure we
4758 : * make progress if the failing transaction is retried.
4759 : * However, we can't kill it if it's already prepared, so
4760 : * in that case we commit suicide instead.
4761 : */
4762 0 : if (SxactIsPrepared(nearConflict->sxactOut))
4763 : {
4764 0 : LWLockRelease(SerializableXactHashLock);
4765 0 : ereport(ERROR,
4766 : (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
4767 : errmsg("could not serialize access due to read/write dependencies among transactions"),
4768 : errdetail_internal("Reason code: Canceled on commit attempt with conflict in from prepared pivot."),
4769 : errhint("The transaction might succeed if retried.")));
4770 : }
4771 0 : nearConflict->sxactOut->flags |= SXACT_FLAG_DOOMED;
4772 0 : break;
4773 : }
4774 0 : farConflict = (RWConflict)
4775 0 : SHMQueueNext(&nearConflict->sxactOut->inConflicts,
4776 0 : &farConflict->inLink,
4777 : offsetof(RWConflictData, inLink));
4778 : }
4779 : }
4780 :
4781 1 : nearConflict = (RWConflict)
4782 1 : SHMQueueNext(&MySerializableXact->inConflicts,
4783 1 : &nearConflict->inLink,
4784 : offsetof(RWConflictData, inLink));
4785 : }
4786 :
4787 10 : MySerializableXact->prepareSeqNo = ++(PredXact->LastSxactCommitSeqNo);
4788 10 : MySerializableXact->flags |= SXACT_FLAG_PREPARED;
4789 :
4790 10 : LWLockRelease(SerializableXactHashLock);
4791 : }
4792 :
4793 : /*------------------------------------------------------------------------*/
4794 :
4795 : /*
4796 : * Two-phase commit support
4797 : */
4798 :
4799 : /*
4800 : * AtPrepare_Locks
4801 : * Do the preparatory work for a PREPARE: make 2PC state file
4802 : * records for all predicate locks currently held.
4803 : */
4804 : void
4805 6 : AtPrepare_PredicateLocks(void)
4806 : {
4807 : PREDICATELOCK *predlock;
4808 : SERIALIZABLEXACT *sxact;
4809 : TwoPhasePredicateRecord record;
4810 : TwoPhasePredicateXactRecord *xactRecord;
4811 : TwoPhasePredicateLockRecord *lockRecord;
4812 :
4813 6 : sxact = MySerializableXact;
4814 6 : xactRecord = &(record.data.xactRecord);
4815 6 : lockRecord = &(record.data.lockRecord);
4816 :
4817 6 : if (MySerializableXact == InvalidSerializableXact)
4818 6 : return;
4819 :
4820 : /* Generate an xact record for our SERIALIZABLEXACT */
4821 6 : record.type = TWOPHASEPREDICATERECORD_XACT;
4822 6 : xactRecord->xmin = MySerializableXact->xmin;
4823 6 : xactRecord->flags = MySerializableXact->flags;
4824 :
4825 : /*
4826 : * Note that we don't include the list of conflicts in our out in the
4827 : * statefile, because new conflicts can be added even after the
4828 : * transaction prepares. We'll just make a conservative assumption during
4829 : * recovery instead.
4830 : */
4831 :
4832 6 : RegisterTwoPhaseRecord(TWOPHASE_RM_PREDICATELOCK_ID, 0,
4833 : &record, sizeof(record));
4834 :
4835 : /*
4836 : * Generate a lock record for each lock.
4837 : *
4838 : * To do this, we need to walk the predicate lock list in our sxact rather
4839 : * than using the local predicate lock table because the latter is not
4840 : * guaranteed to be accurate.
4841 : */
4842 6 : LWLockAcquire(SerializablePredicateLockListLock, LW_SHARED);
4843 :
4844 6 : predlock = (PREDICATELOCK *)
4845 6 : SHMQueueNext(&(sxact->predicateLocks),
4846 6 : &(sxact->predicateLocks),
4847 : offsetof(PREDICATELOCK, xactLink));
4848 :
4849 17 : while (predlock != NULL)
4850 : {
4851 5 : record.type = TWOPHASEPREDICATERECORD_LOCK;
4852 5 : lockRecord->target = predlock->tag.myTarget->tag;
4853 :
4854 5 : RegisterTwoPhaseRecord(TWOPHASE_RM_PREDICATELOCK_ID, 0,
4855 : &record, sizeof(record));
4856 :
4857 5 : predlock = (PREDICATELOCK *)
4858 5 : SHMQueueNext(&(sxact->predicateLocks),
4859 5 : &(predlock->xactLink),
4860 : offsetof(PREDICATELOCK, xactLink));
4861 : }
4862 :
4863 6 : LWLockRelease(SerializablePredicateLockListLock);
4864 : }
4865 :
4866 : /*
4867 : * PostPrepare_Locks
4868 : * Clean up after successful PREPARE. Unlike the non-predicate
4869 : * lock manager, we do not need to transfer locks to a dummy
4870 : * PGPROC because our SERIALIZABLEXACT will stay around
4871 : * anyway. We only need to clean up our local state.
4872 : */
4873 : void
4874 6 : PostPrepare_PredicateLocks(TransactionId xid)
4875 : {
4876 6 : if (MySerializableXact == InvalidSerializableXact)
4877 6 : return;
4878 :
4879 6 : Assert(SxactIsPrepared(MySerializableXact));
4880 :
4881 6 : MySerializableXact->pid = 0;
4882 :
4883 6 : hash_destroy(LocalPredicateLockHash);
4884 6 : LocalPredicateLockHash = NULL;
4885 :
4886 6 : MySerializableXact = InvalidSerializableXact;
4887 6 : MyXactDidWrite = false;
4888 : }
4889 :
4890 : /*
4891 : * PredicateLockTwoPhaseFinish
4892 : * Release a prepared transaction's predicate locks once it
4893 : * commits or aborts.
4894 : */
4895 : void
4896 6 : PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit)
4897 : {
4898 : SERIALIZABLEXID *sxid;
4899 : SERIALIZABLEXIDTAG sxidtag;
4900 :
4901 6 : sxidtag.xid = xid;
4902 :
4903 6 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
4904 6 : sxid = (SERIALIZABLEXID *)
4905 6 : hash_search(SerializableXidHash, &sxidtag, HASH_FIND, NULL);
4906 6 : LWLockRelease(SerializableXactHashLock);
4907 :
4908 : /* xid will not be found if it wasn't a serializable transaction */
4909 6 : if (sxid == NULL)
4910 6 : return;
4911 :
4912 : /* Release its locks */
4913 6 : MySerializableXact = sxid->myXact;
4914 6 : MyXactDidWrite = true; /* conservatively assume that we wrote
4915 : * something */
4916 6 : ReleasePredicateLocks(isCommit);
4917 : }
4918 :
4919 : /*
4920 : * Re-acquire a predicate lock belonging to a transaction that was prepared.
4921 : */
4922 : void
4923 0 : predicatelock_twophase_recover(TransactionId xid, uint16 info,
4924 : void *recdata, uint32 len)
4925 : {
4926 : TwoPhasePredicateRecord *record;
4927 :
4928 0 : Assert(len == sizeof(TwoPhasePredicateRecord));
4929 :
4930 0 : record = (TwoPhasePredicateRecord *) recdata;
4931 :
4932 0 : Assert((record->type == TWOPHASEPREDICATERECORD_XACT) ||
4933 : (record->type == TWOPHASEPREDICATERECORD_LOCK));
4934 :
4935 0 : if (record->type == TWOPHASEPREDICATERECORD_XACT)
4936 : {
4937 : /* Per-transaction record. Set up a SERIALIZABLEXACT. */
4938 : TwoPhasePredicateXactRecord *xactRecord;
4939 : SERIALIZABLEXACT *sxact;
4940 : SERIALIZABLEXID *sxid;
4941 : SERIALIZABLEXIDTAG sxidtag;
4942 : bool found;
4943 :
4944 0 : xactRecord = (TwoPhasePredicateXactRecord *) &record->data.xactRecord;
4945 :
4946 0 : LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
4947 0 : sxact = CreatePredXact();
4948 0 : if (!sxact)
4949 0 : ereport(ERROR,
4950 : (errcode(ERRCODE_OUT_OF_MEMORY),
4951 : errmsg("out of shared memory")));
4952 :
4953 : /* vxid for a prepared xact is InvalidBackendId/xid; no pid */
4954 0 : sxact->vxid.backendId = InvalidBackendId;
4955 0 : sxact->vxid.localTransactionId = (LocalTransactionId) xid;
4956 0 : sxact->pid = 0;
4957 :
4958 : /* a prepared xact hasn't committed yet */
4959 0 : sxact->prepareSeqNo = RecoverySerCommitSeqNo;
4960 0 : sxact->commitSeqNo = InvalidSerCommitSeqNo;
4961 0 : sxact->finishedBefore = InvalidTransactionId;
4962 :
4963 0 : sxact->SeqNo.lastCommitBeforeSnapshot = RecoverySerCommitSeqNo;
4964 :
4965 : /*
4966 : * Don't need to track this; no transactions running at the time the
4967 : * recovered xact started are still active, except possibly other
4968 : * prepared xacts and we don't care whether those are RO_SAFE or not.
4969 : */
4970 0 : SHMQueueInit(&(sxact->possibleUnsafeConflicts));
4971 :
4972 0 : SHMQueueInit(&(sxact->predicateLocks));
4973 0 : SHMQueueElemInit(&(sxact->finishedLink));
4974 :
4975 0 : sxact->topXid = xid;
4976 0 : sxact->xmin = xactRecord->xmin;
4977 0 : sxact->flags = xactRecord->flags;
4978 0 : Assert(SxactIsPrepared(sxact));
4979 0 : if (!SxactIsReadOnly(sxact))
4980 : {
4981 0 : ++(PredXact->WritableSxactCount);
4982 0 : Assert(PredXact->WritableSxactCount <=
4983 : (MaxBackends + max_prepared_xacts));
4984 : }
4985 :
4986 : /*
4987 : * We don't know whether the transaction had any conflicts or not, so
4988 : * we'll conservatively assume that it had both a conflict in and a
4989 : * conflict out, and represent that with the summary conflict flags.
4990 : */
4991 0 : SHMQueueInit(&(sxact->outConflicts));
4992 0 : SHMQueueInit(&(sxact->inConflicts));
4993 0 : sxact->flags |= SXACT_FLAG_SUMMARY_CONFLICT_IN;
4994 0 : sxact->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT;
4995 :
4996 : /* Register the transaction's xid */
4997 0 : sxidtag.xid = xid;
4998 0 : sxid = (SERIALIZABLEXID *) hash_search(SerializableXidHash,
4999 : &sxidtag,
5000 : HASH_ENTER, &found);
5001 0 : Assert(sxid != NULL);
5002 0 : Assert(!found);
5003 0 : sxid->myXact = (SERIALIZABLEXACT *) sxact;
5004 :
5005 : /*
5006 : * Update global xmin. Note that this is a special case compared to
5007 : * registering a normal transaction, because the global xmin might go
5008 : * backwards. That's OK, because until recovery is over we're not
5009 : * going to complete any transactions or create any non-prepared
5010 : * transactions, so there's no danger of throwing away.
5011 : */
5012 0 : if ((!TransactionIdIsValid(PredXact->SxactGlobalXmin)) ||
5013 0 : (TransactionIdFollows(PredXact->SxactGlobalXmin, sxact->xmin)))
5014 : {
5015 0 : PredXact->SxactGlobalXmin = sxact->xmin;
5016 0 : PredXact->SxactGlobalXminCount = 1;
5017 0 : OldSerXidSetActiveSerXmin(sxact->xmin);
5018 : }
5019 0 : else if (TransactionIdEquals(sxact->xmin, PredXact->SxactGlobalXmin))
5020 : {
5021 0 : Assert(PredXact->SxactGlobalXminCount > 0);
5022 0 : PredXact->SxactGlobalXminCount++;
5023 : }
5024 :
5025 0 : LWLockRelease(SerializableXactHashLock);
5026 : }
5027 0 : else if (record->type == TWOPHASEPREDICATERECORD_LOCK)
5028 : {
5029 : /* Lock record. Recreate the PREDICATELOCK */
5030 : TwoPhasePredicateLockRecord *lockRecord;
5031 : SERIALIZABLEXID *sxid;
5032 : SERIALIZABLEXACT *sxact;
5033 : SERIALIZABLEXIDTAG sxidtag;
5034 : uint32 targettaghash;
5035 :
5036 0 : lockRecord = (TwoPhasePredicateLockRecord *) &record->data.lockRecord;
5037 0 : targettaghash = PredicateLockTargetTagHashCode(&lockRecord->target);
5038 :
5039 0 : LWLockAcquire(SerializableXactHashLock, LW_SHARED);
5040 0 : sxidtag.xid = xid;
5041 0 : sxid = (SERIALIZABLEXID *)
5042 0 : hash_search(SerializableXidHash, &sxidtag, HASH_FIND, NULL);
5043 0 : LWLockRelease(SerializableXactHashLock);
5044 :
5045 0 : Assert(sxid != NULL);
5046 0 : sxact = sxid->myXact;
5047 0 : Assert(sxact != InvalidSerializableXact);
5048 :
5049 0 : CreatePredicateLock(&lockRecord->target, targettaghash, sxact);
5050 : }
5051 0 : }
|