Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dynahash.c
4 : * dynamic hash tables
5 : *
6 : * dynahash.c supports both local-to-a-backend hash tables and hash tables in
7 : * shared memory. For shared hash tables, it is the caller's responsibility
8 : * to provide appropriate access interlocking. The simplest convention is
9 : * that a single LWLock protects the whole hash table. Searches (HASH_FIND or
10 : * hash_seq_search) need only shared lock, but any update requires exclusive
11 : * lock. For heavily-used shared tables, the single-lock approach creates a
12 : * concurrency bottleneck, so we also support "partitioned" locking wherein
13 : * there are multiple LWLocks guarding distinct subsets of the table. To use
14 : * a hash table in partitioned mode, the HASH_PARTITION flag must be given
15 : * to hash_create. This prevents any attempt to split buckets on-the-fly.
16 : * Therefore, each hash bucket chain operates independently, and no fields
17 : * of the hash header change after init except nentries and freeList.
18 : * (A partitioned table uses multiple copies of those fields, guarded by
19 : * spinlocks, for additional concurrency.)
20 : * This lets any subset of the hash buckets be treated as a separately
21 : * lockable partition. We expect callers to use the low-order bits of a
22 : * lookup key's hash value as a partition number --- this will work because
23 : * of the way calc_bucket() maps hash values to bucket numbers.
24 : *
25 : * For hash tables in shared memory, the memory allocator function should
26 : * match malloc's semantics of returning NULL on failure. For hash tables
27 : * in local memory, we typically use palloc() which will throw error on
28 : * failure. The code in this file has to cope with both cases.
29 : *
30 : * dynahash.c provides support for these types of lookup keys:
31 : *
32 : * 1. Null-terminated C strings (truncated if necessary to fit in keysize),
33 : * compared as though by strcmp(). This is the default behavior.
34 : *
35 : * 2. Arbitrary binary data of size keysize, compared as though by memcmp().
36 : * (Caller must ensure there are no undefined padding bits in the keys!)
37 : * This is selected by specifying HASH_BLOBS flag to hash_create.
38 : *
39 : * 3. More complex key behavior can be selected by specifying user-supplied
40 : * hashing, comparison, and/or key-copying functions. At least a hashing
41 : * function must be supplied; comparison defaults to memcmp() and key copying
42 : * to memcpy() when a user-defined hashing function is selected.
43 : *
44 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
45 : * Portions Copyright (c) 1994, Regents of the University of California
46 : *
47 : *
48 : * IDENTIFICATION
49 : * src/backend/utils/hash/dynahash.c
50 : *
51 : *-------------------------------------------------------------------------
52 : */
53 :
54 : /*
55 : * Original comments:
56 : *
57 : * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
58 : * Coded into C, with minor code improvements, and with hsearch(3) interface,
59 : * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
60 : * also, hcreate/hdestroy routines added to simulate hsearch(3).
61 : *
62 : * These routines simulate hsearch(3) and family, with the important
63 : * difference that the hash table is dynamic - can grow indefinitely
64 : * beyond its original size (as supplied to hcreate()).
65 : *
66 : * Performance appears to be comparable to that of hsearch(3).
67 : * The 'source-code' options referred to in hsearch(3)'s 'man' page
68 : * are not implemented; otherwise functionality is identical.
69 : *
70 : * Compilation controls:
71 : * HASH_DEBUG controls some informative traces, mainly for debugging.
72 : * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
73 : * when combined with HASH_DEBUG, these are displayed by hdestroy().
74 : *
75 : * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
76 : * concatenation property, in probably unnecessary code 'optimization'.
77 : *
78 : * Modified margo@postgres.berkeley.edu February 1990
79 : * added multiple table interface
80 : * Modified by sullivan@postgres.berkeley.edu April 1990
81 : * changed ctl structure for shared memory
82 : */
83 :
84 : #include "postgres.h"
85 :
86 : #include <limits.h>
87 :
88 : #include "access/xact.h"
89 : #include "storage/shmem.h"
90 : #include "storage/spin.h"
91 : #include "utils/dynahash.h"
92 : #include "utils/memutils.h"
93 :
94 :
95 : /*
96 : * Constants
97 : *
98 : * A hash table has a top-level "directory", each of whose entries points
99 : * to a "segment" of ssize bucket headers. The maximum number of hash
100 : * buckets is thus dsize * ssize (but dsize may be expansible). Of course,
101 : * the number of records in the table can be larger, but we don't want a
102 : * whole lot of records per bucket or performance goes down.
103 : *
104 : * In a hash table allocated in shared memory, the directory cannot be
105 : * expanded because it must stay at a fixed address. The directory size
106 : * should be selected using hash_select_dirsize (and you'd better have
107 : * a good idea of the maximum number of entries!). For non-shared hash
108 : * tables, the initial directory size can be left at the default.
109 : */
110 : #define DEF_SEGSIZE 256
111 : #define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */
112 : #define DEF_DIRSIZE 256
113 : #define DEF_FFACTOR 1 /* default fill factor */
114 :
115 : /* Number of freelists to be used for a partitioned hash table. */
116 : #define NUM_FREELISTS 32
117 :
118 : /* A hash bucket is a linked list of HASHELEMENTs */
119 : typedef HASHELEMENT *HASHBUCKET;
120 :
121 : /* A hash segment is an array of bucket headers */
122 : typedef HASHBUCKET *HASHSEGMENT;
123 :
124 : /*
125 : * Per-freelist data.
126 : *
127 : * In a partitioned hash table, each freelist is associated with a specific
128 : * set of hashcodes, as determined by the FREELIST_IDX() macro below.
129 : * nentries tracks the number of live hashtable entries having those hashcodes
130 : * (NOT the number of entries in the freelist, as you might expect).
131 : *
132 : * The coverage of a freelist might be more or less than one partition, so it
133 : * needs its own lock rather than relying on caller locking. Relying on that
134 : * wouldn't work even if the coverage was the same, because of the occasional
135 : * need to "borrow" entries from another freelist; see get_hash_entry().
136 : *
137 : * Using an array of FreeListData instead of separate arrays of mutexes,
138 : * nentries and freeLists helps to reduce sharing of cache lines between
139 : * different mutexes.
140 : */
141 : typedef struct
142 : {
143 : slock_t mutex; /* spinlock for this freelist */
144 : long nentries; /* number of entries in associated buckets */
145 : HASHELEMENT *freeList; /* chain of free elements */
146 : } FreeListData;
147 :
148 : /*
149 : * Header structure for a hash table --- contains all changeable info
150 : *
151 : * In a shared-memory hash table, the HASHHDR is in shared memory, while
152 : * each backend has a local HTAB struct. For a non-shared table, there isn't
153 : * any functional difference between HASHHDR and HTAB, but we separate them
154 : * anyway to share code between shared and non-shared tables.
155 : */
156 : struct HASHHDR
157 : {
158 : /*
159 : * The freelist can become a point of contention in high-concurrency hash
160 : * tables, so we use an array of freelists, each with its own mutex and
161 : * nentries count, instead of just a single one. Although the freelists
162 : * normally operate independently, we will scavenge entries from freelists
163 : * other than a hashcode's default freelist when necessary.
164 : *
165 : * If the hash table is not partitioned, only freeList[0] is used and its
166 : * spinlock is not used at all; callers' locking is assumed sufficient.
167 : */
168 : FreeListData freeList[NUM_FREELISTS];
169 :
170 : /* These fields can change, but not in a partitioned table */
171 : /* Also, dsize can't change in a shared table, even if unpartitioned */
172 : long dsize; /* directory size */
173 : long nsegs; /* number of allocated segments (<= dsize) */
174 : uint32 max_bucket; /* ID of maximum bucket in use */
175 : uint32 high_mask; /* mask to modulo into entire table */
176 : uint32 low_mask; /* mask to modulo into lower half of table */
177 :
178 : /* These fields are fixed at hashtable creation */
179 : Size keysize; /* hash key length in bytes */
180 : Size entrysize; /* total user element size in bytes */
181 : long num_partitions; /* # partitions (must be power of 2), or 0 */
182 : long ffactor; /* target fill factor */
183 : long max_dsize; /* 'dsize' limit if directory is fixed size */
184 : long ssize; /* segment size --- must be power of 2 */
185 : int sshift; /* segment shift = log2(ssize) */
186 : int nelem_alloc; /* number of entries to allocate at once */
187 :
188 : #ifdef HASH_STATISTICS
189 :
190 : /*
191 : * Count statistics here. NB: stats code doesn't bother with mutex, so
192 : * counts could be corrupted a bit in a partitioned table.
193 : */
194 : long accesses;
195 : long collisions;
196 : #endif
197 : };
198 :
199 : #define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
200 :
201 : #define FREELIST_IDX(hctl, hashcode) \
202 : (IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
203 :
204 : /*
205 : * Top control structure for a hashtable --- in a shared table, each backend
206 : * has its own copy (OK since no fields change at runtime)
207 : */
208 : struct HTAB
209 : {
210 : HASHHDR *hctl; /* => shared control information */
211 : HASHSEGMENT *dir; /* directory of segment starts */
212 : HashValueFunc hash; /* hash function */
213 : HashCompareFunc match; /* key comparison function */
214 : HashCopyFunc keycopy; /* key copying function */
215 : HashAllocFunc alloc; /* memory allocator */
216 : MemoryContext hcxt; /* memory context if default allocator used */
217 : char *tabname; /* table name (for error messages) */
218 : bool isshared; /* true if table is in shared memory */
219 : bool isfixed; /* if true, don't enlarge */
220 :
221 : /* freezing a shared table isn't allowed, so we can keep state here */
222 : bool frozen; /* true = no more inserts allowed */
223 :
224 : /* We keep local copies of these fixed values to reduce contention */
225 : Size keysize; /* hash key length in bytes */
226 : long ssize; /* segment size --- must be power of 2 */
227 : int sshift; /* segment shift = log2(ssize) */
228 : };
229 :
230 : /*
231 : * Key (also entry) part of a HASHELEMENT
232 : */
233 : #define ELEMENTKEY(helem) (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
234 :
235 : /*
236 : * Obtain element pointer given pointer to key
237 : */
238 : #define ELEMENT_FROM_KEY(key) \
239 : ((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
240 :
241 : /*
242 : * Fast MOD arithmetic, assuming that y is a power of 2 !
243 : */
244 : #define MOD(x,y) ((x) & ((y)-1))
245 :
246 : #if HASH_STATISTICS
247 : static long hash_accesses,
248 : hash_collisions,
249 : hash_expansions;
250 : #endif
251 :
252 : /*
253 : * Private function prototypes
254 : */
255 : static void *DynaHashAlloc(Size size);
256 : static HASHSEGMENT seg_alloc(HTAB *hashp);
257 : static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
258 : static bool dir_realloc(HTAB *hashp);
259 : static bool expand_table(HTAB *hashp);
260 : static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
261 : static void hdefault(HTAB *hashp);
262 : static int choose_nelem_alloc(Size entrysize);
263 : static bool init_htab(HTAB *hashp, long nelem);
264 : static void hash_corrupted(HTAB *hashp);
265 : static long next_pow2_long(long num);
266 : static int next_pow2_int(long num);
267 : static void register_seq_scan(HTAB *hashp);
268 : static void deregister_seq_scan(HTAB *hashp);
269 : static bool has_seq_scans(HTAB *hashp);
270 :
271 :
272 : /*
273 : * memory allocation support
274 : */
275 : static MemoryContext CurrentDynaHashCxt = NULL;
276 :
277 : static void *
278 53291 : DynaHashAlloc(Size size)
279 : {
280 53291 : Assert(MemoryContextIsValid(CurrentDynaHashCxt));
281 53291 : return MemoryContextAlloc(CurrentDynaHashCxt, size);
282 : }
283 :
284 :
285 : /*
286 : * HashCompareFunc for string keys
287 : *
288 : * Because we copy keys with strlcpy(), they will be truncated at keysize-1
289 : * bytes, so we can only compare that many ... hence strncmp is almost but
290 : * not quite the right thing.
291 : */
292 : static int
293 30327 : string_compare(const char *key1, const char *key2, Size keysize)
294 : {
295 30327 : return strncmp(key1, key2, keysize - 1);
296 : }
297 :
298 :
299 : /************************** CREATE ROUTINES **********************/
300 :
301 : /*
302 : * hash_create -- create a new dynamic hash table
303 : *
304 : * tabname: a name for the table (for debugging purposes)
305 : * nelem: maximum number of elements expected
306 : * *info: additional table parameters, as indicated by flags
307 : * flags: bitmask indicating which parameters to take from *info
308 : *
309 : * Note: for a shared-memory hashtable, nelem needs to be a pretty good
310 : * estimate, since we can't expand the table on the fly. But an unshared
311 : * hashtable can be expanded on-the-fly, so it's better for nelem to be
312 : * on the small side and let the table grow if it's exceeded. An overly
313 : * large nelem will penalize hash_seq_search speed without buying much.
314 : */
315 : HTAB *
316 9769 : hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
317 : {
318 : HTAB *hashp;
319 : HASHHDR *hctl;
320 :
321 : /*
322 : * For shared hash tables, we have a local hash header (HTAB struct) that
323 : * we allocate in TopMemoryContext; all else is in shared memory.
324 : *
325 : * For non-shared hash tables, everything including the hash header is in
326 : * a memory context created specially for the hash table --- this makes
327 : * hash_destroy very simple. The memory context is made a child of either
328 : * a context specified by the caller, or TopMemoryContext if nothing is
329 : * specified.
330 : */
331 9769 : if (flags & HASH_SHARED_MEM)
332 : {
333 : /* Set up to allocate the hash header */
334 35 : CurrentDynaHashCxt = TopMemoryContext;
335 : }
336 : else
337 : {
338 : /* Create the hash table's private memory context */
339 9734 : if (flags & HASH_CONTEXT)
340 6132 : CurrentDynaHashCxt = info->hcxt;
341 : else
342 3602 : CurrentDynaHashCxt = TopMemoryContext;
343 9734 : CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt,
344 : tabname,
345 : ALLOCSET_DEFAULT_SIZES);
346 : }
347 :
348 : /* Initialize the hash header, plus a copy of the table name */
349 9769 : hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) + 1);
350 9769 : MemSet(hashp, 0, sizeof(HTAB));
351 :
352 9769 : hashp->tabname = (char *) (hashp + 1);
353 9769 : strcpy(hashp->tabname, tabname);
354 :
355 : /*
356 : * Select the appropriate hash function (see comments at head of file).
357 : */
358 9769 : if (flags & HASH_FUNCTION)
359 90 : hashp->hash = info->hash;
360 9679 : else if (flags & HASH_BLOBS)
361 : {
362 : /* We can optimize hashing for common key sizes */
363 6547 : Assert(flags & HASH_ELEM);
364 6547 : if (info->keysize == sizeof(uint32))
365 3942 : hashp->hash = uint32_hash;
366 : else
367 2605 : hashp->hash = tag_hash;
368 : }
369 : else
370 3132 : hashp->hash = string_hash; /* default hash function */
371 :
372 : /*
373 : * If you don't specify a match function, it defaults to string_compare if
374 : * you used string_hash (either explicitly or by default) and to memcmp
375 : * otherwise.
376 : *
377 : * Note: explicitly specifying string_hash is deprecated, because this
378 : * might not work for callers in loadable modules on some platforms due to
379 : * referencing a trampoline instead of the string_hash function proper.
380 : * Just let it default, eh?
381 : */
382 9769 : if (flags & HASH_COMPARE)
383 80 : hashp->match = info->match;
384 9689 : else if (hashp->hash == string_hash)
385 3132 : hashp->match = (HashCompareFunc) string_compare;
386 : else
387 6557 : hashp->match = memcmp;
388 :
389 : /*
390 : * Similarly, the key-copying function defaults to strlcpy or memcpy.
391 : */
392 9769 : if (flags & HASH_KEYCOPY)
393 0 : hashp->keycopy = info->keycopy;
394 9769 : else if (hashp->hash == string_hash)
395 3132 : hashp->keycopy = (HashCopyFunc) strlcpy;
396 : else
397 6637 : hashp->keycopy = memcpy;
398 :
399 : /* And select the entry allocation function, too. */
400 9769 : if (flags & HASH_ALLOC)
401 35 : hashp->alloc = info->alloc;
402 : else
403 9734 : hashp->alloc = DynaHashAlloc;
404 :
405 9769 : if (flags & HASH_SHARED_MEM)
406 : {
407 : /*
408 : * ctl structure and directory are preallocated for shared memory
409 : * tables. Note that HASH_DIRSIZE and HASH_ALLOC had better be set as
410 : * well.
411 : */
412 35 : hashp->hctl = info->hctl;
413 35 : hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
414 35 : hashp->hcxt = NULL;
415 35 : hashp->isshared = true;
416 :
417 : /* hash table already exists, we're just attaching to it */
418 35 : if (flags & HASH_ATTACH)
419 : {
420 : /* make local copies of some heavily-used values */
421 0 : hctl = hashp->hctl;
422 0 : hashp->keysize = hctl->keysize;
423 0 : hashp->ssize = hctl->ssize;
424 0 : hashp->sshift = hctl->sshift;
425 :
426 0 : return hashp;
427 : }
428 : }
429 : else
430 : {
431 : /* setup hash table defaults */
432 9734 : hashp->hctl = NULL;
433 9734 : hashp->dir = NULL;
434 9734 : hashp->hcxt = CurrentDynaHashCxt;
435 9734 : hashp->isshared = false;
436 : }
437 :
438 9769 : if (!hashp->hctl)
439 : {
440 9734 : hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
441 9734 : if (!hashp->hctl)
442 0 : ereport(ERROR,
443 : (errcode(ERRCODE_OUT_OF_MEMORY),
444 : errmsg("out of memory")));
445 : }
446 :
447 9769 : hashp->frozen = false;
448 :
449 9769 : hdefault(hashp);
450 :
451 9769 : hctl = hashp->hctl;
452 :
453 9769 : if (flags & HASH_PARTITION)
454 : {
455 : /* Doesn't make sense to partition a local hash table */
456 25 : Assert(flags & HASH_SHARED_MEM);
457 :
458 : /*
459 : * The number of partitions had better be a power of 2. Also, it must
460 : * be less than INT_MAX (see init_htab()), so call the int version of
461 : * next_pow2.
462 : */
463 25 : Assert(info->num_partitions == next_pow2_int(info->num_partitions));
464 :
465 25 : hctl->num_partitions = info->num_partitions;
466 : }
467 :
468 9769 : if (flags & HASH_SEGMENT)
469 : {
470 0 : hctl->ssize = info->ssize;
471 0 : hctl->sshift = my_log2(info->ssize);
472 : /* ssize had better be a power of 2 */
473 0 : Assert(hctl->ssize == (1L << hctl->sshift));
474 : }
475 9769 : if (flags & HASH_FFACTOR)
476 0 : hctl->ffactor = info->ffactor;
477 :
478 : /*
479 : * SHM hash tables have fixed directory size passed by the caller.
480 : */
481 9769 : if (flags & HASH_DIRSIZE)
482 : {
483 35 : hctl->max_dsize = info->max_dsize;
484 35 : hctl->dsize = info->dsize;
485 : }
486 :
487 : /*
488 : * hash table now allocates space for key and data but you have to say how
489 : * much space to allocate
490 : */
491 9769 : if (flags & HASH_ELEM)
492 : {
493 9769 : Assert(info->entrysize >= info->keysize);
494 9769 : hctl->keysize = info->keysize;
495 9769 : hctl->entrysize = info->entrysize;
496 : }
497 :
498 : /* make local copies of heavily-used constant fields */
499 9769 : hashp->keysize = hctl->keysize;
500 9769 : hashp->ssize = hctl->ssize;
501 9769 : hashp->sshift = hctl->sshift;
502 :
503 : /* Build the hash directory structure */
504 9769 : if (!init_htab(hashp, nelem))
505 0 : elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
506 :
507 : /*
508 : * For a shared hash table, preallocate the requested number of elements.
509 : * This reduces problems with run-time out-of-shared-memory conditions.
510 : *
511 : * For a non-shared hash table, preallocate the requested number of
512 : * elements if it's less than our chosen nelem_alloc. This avoids wasting
513 : * space if the caller correctly estimates a small table size.
514 : */
515 19503 : if ((flags & HASH_SHARED_MEM) ||
516 9734 : nelem < hctl->nelem_alloc)
517 : {
518 : int i,
519 : freelist_partitions,
520 : nelem_alloc,
521 : nelem_alloc_first;
522 :
523 : /*
524 : * If hash table is partitioned, give each freelist an equal share of
525 : * the initial allocation. Otherwise only freeList[0] is used.
526 : */
527 3293 : if (IS_PARTITIONED(hashp->hctl))
528 25 : freelist_partitions = NUM_FREELISTS;
529 : else
530 3268 : freelist_partitions = 1;
531 :
532 3293 : nelem_alloc = nelem / freelist_partitions;
533 3293 : if (nelem_alloc <= 0)
534 0 : nelem_alloc = 1;
535 :
536 : /*
537 : * Make sure we'll allocate all the requested elements; freeList[0]
538 : * gets the excess if the request isn't divisible by NUM_FREELISTS.
539 : */
540 3293 : if (nelem_alloc * freelist_partitions < nelem)
541 1 : nelem_alloc_first =
542 1 : nelem - nelem_alloc * (freelist_partitions - 1);
543 : else
544 3292 : nelem_alloc_first = nelem_alloc;
545 :
546 7361 : for (i = 0; i < freelist_partitions; i++)
547 : {
548 4068 : int temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
549 :
550 4068 : if (!element_alloc(hashp, temp, i))
551 0 : ereport(ERROR,
552 : (errcode(ERRCODE_OUT_OF_MEMORY),
553 : errmsg("out of memory")));
554 : }
555 : }
556 :
557 9769 : if (flags & HASH_FIXED_SIZE)
558 15 : hashp->isfixed = true;
559 9769 : return hashp;
560 : }
561 :
562 : /*
563 : * Set default HASHHDR parameters.
564 : */
565 : static void
566 9769 : hdefault(HTAB *hashp)
567 : {
568 9769 : HASHHDR *hctl = hashp->hctl;
569 :
570 9769 : MemSet(hctl, 0, sizeof(HASHHDR));
571 :
572 9769 : hctl->dsize = DEF_DIRSIZE;
573 9769 : hctl->nsegs = 0;
574 :
575 : /* rather pointless defaults for key & entry size */
576 9769 : hctl->keysize = sizeof(char *);
577 9769 : hctl->entrysize = 2 * sizeof(char *);
578 :
579 9769 : hctl->num_partitions = 0; /* not partitioned */
580 :
581 9769 : hctl->ffactor = DEF_FFACTOR;
582 :
583 : /* table has no fixed maximum size */
584 9769 : hctl->max_dsize = NO_MAX_DSIZE;
585 :
586 9769 : hctl->ssize = DEF_SEGSIZE;
587 9769 : hctl->sshift = DEF_SEGSIZE_SHIFT;
588 :
589 : #ifdef HASH_STATISTICS
590 : hctl->accesses = hctl->collisions = 0;
591 : #endif
592 9769 : }
593 :
594 : /*
595 : * Given the user-specified entry size, choose nelem_alloc, ie, how many
596 : * elements to add to the hash table when we need more.
597 : */
598 : static int
599 9804 : choose_nelem_alloc(Size entrysize)
600 : {
601 : int nelem_alloc;
602 : Size elementSize;
603 : Size allocSize;
604 :
605 : /* Each element has a HASHELEMENT header plus user data. */
606 : /* NB: this had better match element_alloc() */
607 9804 : elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
608 :
609 : /*
610 : * The idea here is to choose nelem_alloc at least 32, but round up so
611 : * that the allocation request will be a power of 2 or just less. This
612 : * makes little difference for hash tables in shared memory, but for hash
613 : * tables managed by palloc, the allocation request will be rounded up to
614 : * a power of 2 anyway. If we fail to take this into account, we'll waste
615 : * as much as half the allocated space.
616 : */
617 9804 : allocSize = 32 * 4; /* assume elementSize at least 8 */
618 : do
619 : {
620 36787 : allocSize <<= 1;
621 36787 : nelem_alloc = allocSize / elementSize;
622 36787 : } while (nelem_alloc < 32);
623 :
624 9804 : return nelem_alloc;
625 : }
626 :
627 : /*
628 : * Compute derived fields of hctl and build the initial directory/segment
629 : * arrays
630 : */
631 : static bool
632 9769 : init_htab(HTAB *hashp, long nelem)
633 : {
634 9769 : HASHHDR *hctl = hashp->hctl;
635 : HASHSEGMENT *segp;
636 : int nbuckets;
637 : int nsegs;
638 : int i;
639 :
640 : /*
641 : * initialize mutexes if it's a partitioned table
642 : */
643 9769 : if (IS_PARTITIONED(hctl))
644 825 : for (i = 0; i < NUM_FREELISTS; i++)
645 800 : SpinLockInit(&(hctl->freeList[i].mutex));
646 :
647 : /*
648 : * Divide number of elements by the fill factor to determine a desired
649 : * number of buckets. Allocate space for the next greater power of two
650 : * number of buckets
651 : */
652 9769 : nbuckets = next_pow2_int((nelem - 1) / hctl->ffactor + 1);
653 :
654 : /*
655 : * In a partitioned table, nbuckets must be at least equal to
656 : * num_partitions; were it less, keys with apparently different partition
657 : * numbers would map to the same bucket, breaking partition independence.
658 : * (Normally nbuckets will be much bigger; this is just a safety check.)
659 : */
660 19538 : while (nbuckets < hctl->num_partitions)
661 0 : nbuckets <<= 1;
662 :
663 9769 : hctl->max_bucket = hctl->low_mask = nbuckets - 1;
664 9769 : hctl->high_mask = (nbuckets << 1) - 1;
665 :
666 : /*
667 : * Figure number of directory segments needed, round up to a power of 2
668 : */
669 9769 : nsegs = (nbuckets - 1) / hctl->ssize + 1;
670 9769 : nsegs = next_pow2_int(nsegs);
671 :
672 : /*
673 : * Make sure directory is big enough. If pre-allocated directory is too
674 : * small, choke (caller screwed up).
675 : */
676 9769 : if (nsegs > hctl->dsize)
677 : {
678 0 : if (!(hashp->dir))
679 0 : hctl->dsize = nsegs;
680 : else
681 0 : return false;
682 : }
683 :
684 : /* Allocate a directory */
685 9769 : if (!(hashp->dir))
686 : {
687 9734 : CurrentDynaHashCxt = hashp->hcxt;
688 9734 : hashp->dir = (HASHSEGMENT *)
689 9734 : hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
690 9734 : if (!hashp->dir)
691 0 : return false;
692 : }
693 :
694 : /* Allocate initial segments */
695 22265 : for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
696 : {
697 12496 : *segp = seg_alloc(hashp);
698 12496 : if (*segp == NULL)
699 0 : return false;
700 : }
701 :
702 : /* Choose number of entries to allocate at a time */
703 9769 : hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize);
704 :
705 : #if HASH_DEBUG
706 : fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n",
707 : "TABLE POINTER ", hashp,
708 : "DIRECTORY SIZE ", hctl->dsize,
709 : "SEGMENT SIZE ", hctl->ssize,
710 : "SEGMENT SHIFT ", hctl->sshift,
711 : "FILL FACTOR ", hctl->ffactor,
712 : "MAX BUCKET ", hctl->max_bucket,
713 : "HIGH MASK ", hctl->high_mask,
714 : "LOW MASK ", hctl->low_mask,
715 : "NSEGS ", hctl->nsegs);
716 : #endif
717 9769 : return true;
718 : }
719 :
720 : /*
721 : * Estimate the space needed for a hashtable containing the given number
722 : * of entries of given size.
723 : * NOTE: this is used to estimate the footprint of hashtables in shared
724 : * memory; therefore it does not count HTAB which is in local memory.
725 : * NB: assumes that all hash structure parameters have default values!
726 : */
727 : Size
728 35 : hash_estimate_size(long num_entries, Size entrysize)
729 : {
730 : Size size;
731 : long nBuckets,
732 : nSegments,
733 : nDirEntries,
734 : nElementAllocs,
735 : elementSize,
736 : elementAllocCnt;
737 :
738 : /* estimate number of buckets wanted */
739 35 : nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1);
740 : /* # of segments needed for nBuckets */
741 35 : nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
742 : /* directory entries */
743 35 : nDirEntries = DEF_DIRSIZE;
744 70 : while (nDirEntries < nSegments)
745 0 : nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
746 :
747 : /* fixed control info */
748 35 : size = MAXALIGN(sizeof(HASHHDR)); /* but not HTAB, per above */
749 : /* directory */
750 35 : size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT)));
751 : /* segments */
752 35 : size = add_size(size, mul_size(nSegments,
753 : MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));
754 : /* elements --- allocated in groups of choose_nelem_alloc() entries */
755 35 : elementAllocCnt = choose_nelem_alloc(entrysize);
756 35 : nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;
757 35 : elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
758 35 : size = add_size(size,
759 : mul_size(nElementAllocs,
760 : mul_size(elementAllocCnt, elementSize)));
761 :
762 35 : return size;
763 : }
764 :
765 : /*
766 : * Select an appropriate directory size for a hashtable with the given
767 : * maximum number of entries.
768 : * This is only needed for hashtables in shared memory, whose directories
769 : * cannot be expanded dynamically.
770 : * NB: assumes that all hash structure parameters have default values!
771 : *
772 : * XXX this had better agree with the behavior of init_htab()...
773 : */
774 : long
775 35 : hash_select_dirsize(long num_entries)
776 : {
777 : long nBuckets,
778 : nSegments,
779 : nDirEntries;
780 :
781 : /* estimate number of buckets wanted */
782 35 : nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1);
783 : /* # of segments needed for nBuckets */
784 35 : nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
785 : /* directory entries */
786 35 : nDirEntries = DEF_DIRSIZE;
787 70 : while (nDirEntries < nSegments)
788 0 : nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */
789 :
790 35 : return nDirEntries;
791 : }
792 :
793 : /*
794 : * Compute the required initial memory allocation for a shared-memory
795 : * hashtable with the given parameters. We need space for the HASHHDR
796 : * and for the (non expansible) directory.
797 : */
798 : Size
799 35 : hash_get_shared_size(HASHCTL *info, int flags)
800 : {
801 35 : Assert(flags & HASH_DIRSIZE);
802 35 : Assert(info->dsize == info->max_dsize);
803 35 : return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
804 : }
805 :
806 :
807 : /********************** DESTROY ROUTINES ************************/
808 :
809 : void
810 4226 : hash_destroy(HTAB *hashp)
811 : {
812 4226 : if (hashp != NULL)
813 : {
814 : /* allocation method must be one we know how to free, too */
815 4226 : Assert(hashp->alloc == DynaHashAlloc);
816 : /* so this hashtable must have it's own context */
817 4226 : Assert(hashp->hcxt != NULL);
818 :
819 4226 : hash_stats("destroy", hashp);
820 :
821 : /*
822 : * Free everything by destroying the hash table's memory context.
823 : */
824 4226 : MemoryContextDelete(hashp->hcxt);
825 : }
826 4226 : }
827 :
828 : void
829 4226 : hash_stats(const char *where, HTAB *hashp)
830 : {
831 : #if HASH_STATISTICS
832 : fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
833 : where, hashp->hctl->accesses, hashp->hctl->collisions);
834 :
835 : fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
836 : hash_get_num_entries(hashp), (long) hashp->hctl->keysize,
837 : hashp->hctl->max_bucket, hashp->hctl->nsegs);
838 : fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
839 : where, hash_accesses, hash_collisions);
840 : fprintf(stderr, "hash_stats: total expansions %ld\n",
841 : hash_expansions);
842 : #endif
843 4226 : }
844 :
845 : /*******************************SEARCH ROUTINES *****************************/
846 :
847 :
848 : /*
849 : * get_hash_value -- exported routine to calculate a key's hash value
850 : *
851 : * We export this because for partitioned tables, callers need to compute
852 : * the partition number (from the low-order bits of the hash value) before
853 : * searching.
854 : */
855 : uint32
856 4151782 : get_hash_value(HTAB *hashp, const void *keyPtr)
857 : {
858 4151782 : return hashp->hash(keyPtr, hashp->keysize);
859 : }
860 :
861 : /* Convert a hash value to a bucket number */
862 : static inline uint32
863 8244430 : calc_bucket(HASHHDR *hctl, uint32 hash_val)
864 : {
865 : uint32 bucket;
866 :
867 8244430 : bucket = hash_val & hctl->high_mask;
868 8244430 : if (bucket > hctl->max_bucket)
869 3871448 : bucket = bucket & hctl->low_mask;
870 :
871 8244430 : return bucket;
872 : }
873 :
874 : /*
875 : * hash_search -- look up key in table and perform action
876 : * hash_search_with_hash_value -- same, with key's hash value already computed
877 : *
878 : * action is one of:
879 : * HASH_FIND: look up key in table
880 : * HASH_ENTER: look up key in table, creating entry if not present
881 : * HASH_ENTER_NULL: same, but return NULL if out of memory
882 : * HASH_REMOVE: look up key in table, remove entry if present
883 : *
884 : * Return value is a pointer to the element found/entered/removed if any,
885 : * or NULL if no match was found. (NB: in the case of the REMOVE action,
886 : * the result is a dangling pointer that shouldn't be dereferenced!)
887 : *
888 : * HASH_ENTER will normally ereport a generic "out of memory" error if
889 : * it is unable to create a new entry. The HASH_ENTER_NULL operation is
890 : * the same except it will return NULL if out of memory. Note that
891 : * HASH_ENTER_NULL cannot be used with the default palloc-based allocator,
892 : * since palloc internally ereports on out-of-memory.
893 : *
894 : * If foundPtr isn't NULL, then *foundPtr is set TRUE if we found an
895 : * existing entry in the table, FALSE otherwise. This is needed in the
896 : * HASH_ENTER case, but is redundant with the return value otherwise.
897 : *
898 : * For hash_search_with_hash_value, the hashvalue parameter must have been
899 : * calculated with get_hash_value().
900 : */
901 : void *
902 4604875 : hash_search(HTAB *hashp,
903 : const void *keyPtr,
904 : HASHACTION action,
905 : bool *foundPtr)
906 : {
907 4604875 : return hash_search_with_hash_value(hashp,
908 : keyPtr,
909 4604875 : hashp->hash(keyPtr, hashp->keysize),
910 : action,
911 : foundPtr);
912 : }
913 :
914 : void *
915 8220006 : hash_search_with_hash_value(HTAB *hashp,
916 : const void *keyPtr,
917 : uint32 hashvalue,
918 : HASHACTION action,
919 : bool *foundPtr)
920 : {
921 8220006 : HASHHDR *hctl = hashp->hctl;
922 8220006 : int freelist_idx = FREELIST_IDX(hctl, hashvalue);
923 : Size keysize;
924 : uint32 bucket;
925 : long segment_num;
926 : long segment_ndx;
927 : HASHSEGMENT segp;
928 : HASHBUCKET currBucket;
929 : HASHBUCKET *prevBucketPtr;
930 : HashCompareFunc match;
931 :
932 : #if HASH_STATISTICS
933 : hash_accesses++;
934 : hctl->accesses++;
935 : #endif
936 :
937 : /*
938 : * If inserting, check if it is time to split a bucket.
939 : *
940 : * NOTE: failure to expand table is not a fatal error, it just means we
941 : * have to run at higher fill factor than we wanted. However, if we're
942 : * using the palloc allocator then it will throw error anyway on
943 : * out-of-memory, so we must do this before modifying the table.
944 : */
945 8220006 : if (action == HASH_ENTER || action == HASH_ENTER_NULL)
946 : {
947 : /*
948 : * Can't split if running in partitioned mode, nor if frozen, nor if
949 : * table is the subject of any active hash_seq_search scans. Strange
950 : * order of these tests is to try to check cheaper conditions first.
951 : */
952 3016225 : if (!IS_PARTITIONED(hctl) && !hashp->frozen &&
953 1426349 : hctl->freeList[0].nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
954 18967 : !has_seq_scans(hashp))
955 18967 : (void) expand_table(hashp);
956 : }
957 :
958 : /*
959 : * Do the initial lookup
960 : */
961 8220006 : bucket = calc_bucket(hctl, hashvalue);
962 :
963 8220006 : segment_num = bucket >> hashp->sshift;
964 8220006 : segment_ndx = MOD(bucket, hashp->ssize);
965 :
966 8220006 : segp = hashp->dir[segment_num];
967 :
968 8220006 : if (segp == NULL)
969 0 : hash_corrupted(hashp);
970 :
971 8220006 : prevBucketPtr = &segp[segment_ndx];
972 8220006 : currBucket = *prevBucketPtr;
973 :
974 : /*
975 : * Follow collision chain looking for matching key
976 : */
977 8220006 : match = hashp->match; /* save one fetch in inner loop */
978 8220006 : keysize = hashp->keysize; /* ditto */
979 :
980 17892371 : while (currBucket != NULL)
981 : {
982 14786425 : if (currBucket->hashvalue == hashvalue &&
983 6667094 : match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
984 6666972 : break;
985 1452359 : prevBucketPtr = &(currBucket->link);
986 1452359 : currBucket = *prevBucketPtr;
987 : #if HASH_STATISTICS
988 : hash_collisions++;
989 : hctl->collisions++;
990 : #endif
991 : }
992 :
993 8220006 : if (foundPtr)
994 1575416 : *foundPtr = (bool) (currBucket != NULL);
995 :
996 : /*
997 : * OK, now what?
998 : */
999 8220006 : switch (action)
1000 : {
1001 : case HASH_FIND:
1002 5471252 : if (currBucket != NULL)
1003 5260780 : return (void *) ELEMENTKEY(currBucket);
1004 210472 : return NULL;
1005 :
1006 : case HASH_REMOVE:
1007 1139911 : if (currBucket != NULL)
1008 : {
1009 : /* if partitioned, must lock to touch nentries and freeList */
1010 1138088 : if (IS_PARTITIONED(hctl))
1011 182868 : SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
1012 :
1013 : /* delete the record from the appropriate nentries counter. */
1014 1138088 : Assert(hctl->freeList[freelist_idx].nentries > 0);
1015 1138088 : hctl->freeList[freelist_idx].nentries--;
1016 :
1017 : /* remove record from hash bucket's chain. */
1018 1138088 : *prevBucketPtr = currBucket->link;
1019 :
1020 : /* add the record to the appropriate freelist. */
1021 1138088 : currBucket->link = hctl->freeList[freelist_idx].freeList;
1022 1138088 : hctl->freeList[freelist_idx].freeList = currBucket;
1023 :
1024 1138088 : if (IS_PARTITIONED(hctl))
1025 182868 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1026 :
1027 : /*
1028 : * better hope the caller is synchronizing access to this
1029 : * element, because someone else is going to reuse it the next
1030 : * time something is added to the table
1031 : */
1032 1138088 : return (void *) ELEMENTKEY(currBucket);
1033 : }
1034 1823 : return NULL;
1035 :
1036 : case HASH_ENTER_NULL:
1037 : /* ENTER_NULL does not work with palloc-based allocator */
1038 184497 : Assert(hashp->alloc != DynaHashAlloc);
1039 : /* FALL THRU */
1040 :
1041 : case HASH_ENTER:
1042 : /* Return existing element if found, else create one */
1043 1608843 : if (currBucket != NULL)
1044 268104 : return (void *) ELEMENTKEY(currBucket);
1045 :
1046 : /* disallow inserts if frozen */
1047 1340739 : if (hashp->frozen)
1048 0 : elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
1049 : hashp->tabname);
1050 :
1051 1340739 : currBucket = get_hash_entry(hashp, freelist_idx);
1052 1340739 : if (currBucket == NULL)
1053 : {
1054 : /* out of memory */
1055 0 : if (action == HASH_ENTER_NULL)
1056 0 : return NULL;
1057 : /* report a generic message */
1058 0 : if (hashp->isshared)
1059 0 : ereport(ERROR,
1060 : (errcode(ERRCODE_OUT_OF_MEMORY),
1061 : errmsg("out of shared memory")));
1062 : else
1063 0 : ereport(ERROR,
1064 : (errcode(ERRCODE_OUT_OF_MEMORY),
1065 : errmsg("out of memory")));
1066 : }
1067 :
1068 : /* link into hashbucket chain */
1069 1340739 : *prevBucketPtr = currBucket;
1070 1340739 : currBucket->link = NULL;
1071 :
1072 : /* copy key into record */
1073 1340739 : currBucket->hashvalue = hashvalue;
1074 1340739 : hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
1075 :
1076 : /*
1077 : * Caller is expected to fill the data field on return. DO NOT
1078 : * insert any code that could possibly throw error here, as doing
1079 : * so would leave the table entry incomplete and hence corrupt the
1080 : * caller's data structure.
1081 : */
1082 :
1083 1340739 : return (void *) ELEMENTKEY(currBucket);
1084 : }
1085 :
1086 0 : elog(ERROR, "unrecognized hash action code: %d", (int) action);
1087 :
1088 : return NULL; /* keep compiler quiet */
1089 : }
1090 :
1091 : /*
1092 : * hash_update_hash_key -- change the hash key of an existing table entry
1093 : *
1094 : * This is equivalent to removing the entry, making a new entry, and copying
1095 : * over its data, except that the entry never goes to the table's freelist.
1096 : * Therefore this cannot suffer an out-of-memory failure, even if there are
1097 : * other processes operating in other partitions of the hashtable.
1098 : *
1099 : * Returns TRUE if successful, FALSE if the requested new hash key is already
1100 : * present. Throws error if the specified entry pointer isn't actually a
1101 : * table member.
1102 : *
1103 : * NB: currently, there is no special case for old and new hash keys being
1104 : * identical, which means we'll report FALSE for that situation. This is
1105 : * preferable for existing uses.
1106 : *
1107 : * NB: for a partitioned hashtable, caller must hold lock on both relevant
1108 : * partitions, if the new hash key would belong to a different partition.
1109 : */
1110 : bool
1111 17 : hash_update_hash_key(HTAB *hashp,
1112 : void *existingEntry,
1113 : const void *newKeyPtr)
1114 : {
1115 17 : HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
1116 17 : HASHHDR *hctl = hashp->hctl;
1117 : uint32 newhashvalue;
1118 : Size keysize;
1119 : uint32 bucket;
1120 : uint32 newbucket;
1121 : long segment_num;
1122 : long segment_ndx;
1123 : HASHSEGMENT segp;
1124 : HASHBUCKET currBucket;
1125 : HASHBUCKET *prevBucketPtr;
1126 : HASHBUCKET *oldPrevPtr;
1127 : HashCompareFunc match;
1128 :
1129 : #if HASH_STATISTICS
1130 : hash_accesses++;
1131 : hctl->accesses++;
1132 : #endif
1133 :
1134 : /* disallow updates if frozen */
1135 17 : if (hashp->frozen)
1136 0 : elog(ERROR, "cannot update in frozen hashtable \"%s\"",
1137 : hashp->tabname);
1138 :
1139 : /*
1140 : * Lookup the existing element using its saved hash value. We need to do
1141 : * this to be able to unlink it from its hash chain, but as a side benefit
1142 : * we can verify the validity of the passed existingEntry pointer.
1143 : */
1144 17 : bucket = calc_bucket(hctl, existingElement->hashvalue);
1145 :
1146 17 : segment_num = bucket >> hashp->sshift;
1147 17 : segment_ndx = MOD(bucket, hashp->ssize);
1148 :
1149 17 : segp = hashp->dir[segment_num];
1150 :
1151 17 : if (segp == NULL)
1152 0 : hash_corrupted(hashp);
1153 :
1154 17 : prevBucketPtr = &segp[segment_ndx];
1155 17 : currBucket = *prevBucketPtr;
1156 :
1157 34 : while (currBucket != NULL)
1158 : {
1159 17 : if (currBucket == existingElement)
1160 17 : break;
1161 0 : prevBucketPtr = &(currBucket->link);
1162 0 : currBucket = *prevBucketPtr;
1163 : }
1164 :
1165 17 : if (currBucket == NULL)
1166 0 : elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
1167 : hashp->tabname);
1168 :
1169 17 : oldPrevPtr = prevBucketPtr;
1170 :
1171 : /*
1172 : * Now perform the equivalent of a HASH_ENTER operation to locate the hash
1173 : * chain we want to put the entry into.
1174 : */
1175 17 : newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
1176 :
1177 17 : newbucket = calc_bucket(hctl, newhashvalue);
1178 :
1179 17 : segment_num = newbucket >> hashp->sshift;
1180 17 : segment_ndx = MOD(newbucket, hashp->ssize);
1181 :
1182 17 : segp = hashp->dir[segment_num];
1183 :
1184 17 : if (segp == NULL)
1185 0 : hash_corrupted(hashp);
1186 :
1187 17 : prevBucketPtr = &segp[segment_ndx];
1188 17 : currBucket = *prevBucketPtr;
1189 :
1190 : /*
1191 : * Follow collision chain looking for matching key
1192 : */
1193 17 : match = hashp->match; /* save one fetch in inner loop */
1194 17 : keysize = hashp->keysize; /* ditto */
1195 :
1196 34 : while (currBucket != NULL)
1197 : {
1198 0 : if (currBucket->hashvalue == newhashvalue &&
1199 0 : match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)
1200 0 : break;
1201 0 : prevBucketPtr = &(currBucket->link);
1202 0 : currBucket = *prevBucketPtr;
1203 : #if HASH_STATISTICS
1204 : hash_collisions++;
1205 : hctl->collisions++;
1206 : #endif
1207 : }
1208 :
1209 17 : if (currBucket != NULL)
1210 0 : return false; /* collision with an existing entry */
1211 :
1212 17 : currBucket = existingElement;
1213 :
1214 : /*
1215 : * If old and new hash values belong to the same bucket, we need not
1216 : * change any chain links, and indeed should not since this simplistic
1217 : * update will corrupt the list if currBucket is the last element. (We
1218 : * cannot fall out earlier, however, since we need to scan the bucket to
1219 : * check for duplicate keys.)
1220 : */
1221 17 : if (bucket != newbucket)
1222 : {
1223 : /* OK to remove record from old hash bucket's chain. */
1224 17 : *oldPrevPtr = currBucket->link;
1225 :
1226 : /* link into new hashbucket chain */
1227 17 : *prevBucketPtr = currBucket;
1228 17 : currBucket->link = NULL;
1229 : }
1230 :
1231 : /* copy new key into record */
1232 17 : currBucket->hashvalue = newhashvalue;
1233 17 : hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
1234 :
1235 : /* rest of record is untouched */
1236 :
1237 17 : return true;
1238 : }
1239 :
1240 : /*
1241 : * Allocate a new hashtable entry if possible; return NULL if out of memory.
1242 : * (Or, if the underlying space allocator throws error for out-of-memory,
1243 : * we won't return at all.)
1244 : */
1245 : static HASHBUCKET
1246 1340739 : get_hash_entry(HTAB *hashp, int freelist_idx)
1247 : {
1248 1340739 : HASHHDR *hctl = hashp->hctl;
1249 : HASHBUCKET newElement;
1250 :
1251 : for (;;)
1252 : {
1253 : /* if partitioned, must lock to touch nentries and freeList */
1254 1350235 : if (IS_PARTITIONED(hctl))
1255 192210 : SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1256 :
1257 : /* try to get an entry from the freelist */
1258 1350235 : newElement = hctl->freeList[freelist_idx].freeList;
1259 :
1260 1350235 : if (newElement != NULL)
1261 1340739 : break;
1262 :
1263 9496 : if (IS_PARTITIONED(hctl))
1264 0 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1265 :
1266 : /*
1267 : * No free elements in this freelist. In a partitioned table, there
1268 : * might be entries in other freelists, but to reduce contention we
1269 : * prefer to first try to get another chunk of buckets from the main
1270 : * shmem allocator. If that fails, though, we *MUST* root through all
1271 : * the other freelists before giving up. There are multiple callers
1272 : * that assume that they can allocate every element in the initially
1273 : * requested table size, or that deleting an element guarantees they
1274 : * can insert a new element, even if shared memory is entirely full.
1275 : * Failing because the needed element is in a different freelist is
1276 : * not acceptable.
1277 : */
1278 9496 : if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
1279 : {
1280 : int borrow_from_idx;
1281 :
1282 0 : if (!IS_PARTITIONED(hctl))
1283 0 : return NULL; /* out of memory */
1284 :
1285 : /* try to borrow element from another freelist */
1286 0 : borrow_from_idx = freelist_idx;
1287 : for (;;)
1288 : {
1289 0 : borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;
1290 0 : if (borrow_from_idx == freelist_idx)
1291 0 : break; /* examined all freelists, fail */
1292 :
1293 0 : SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));
1294 0 : newElement = hctl->freeList[borrow_from_idx].freeList;
1295 :
1296 0 : if (newElement != NULL)
1297 : {
1298 0 : hctl->freeList[borrow_from_idx].freeList = newElement->link;
1299 0 : SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1300 :
1301 : /* careful: count the new element in its proper freelist */
1302 0 : SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1303 0 : hctl->freeList[freelist_idx].nentries++;
1304 0 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1305 :
1306 0 : return newElement;
1307 : }
1308 :
1309 0 : SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
1310 0 : }
1311 :
1312 : /* no elements available to borrow either, so out of memory */
1313 0 : return NULL;
1314 : }
1315 9496 : }
1316 :
1317 : /* remove entry from freelist, bump nentries */
1318 1340739 : hctl->freeList[freelist_idx].freeList = newElement->link;
1319 1340739 : hctl->freeList[freelist_idx].nentries++;
1320 :
1321 1340739 : if (IS_PARTITIONED(hctl))
1322 192210 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1323 :
1324 1340739 : return newElement;
1325 : }
1326 :
1327 : /*
1328 : * hash_get_num_entries -- get the number of entries in a hashtable
1329 : */
1330 : long
1331 217 : hash_get_num_entries(HTAB *hashp)
1332 : {
1333 : int i;
1334 217 : long sum = hashp->hctl->freeList[0].nentries;
1335 :
1336 : /*
1337 : * We currently don't bother with acquiring the mutexes; it's only
1338 : * sensible to call this function if you've got lock on all partitions of
1339 : * the table.
1340 : */
1341 217 : if (IS_PARTITIONED(hashp->hctl))
1342 : {
1343 3744 : for (i = 1; i < NUM_FREELISTS; i++)
1344 3627 : sum += hashp->hctl->freeList[i].nentries;
1345 : }
1346 :
1347 217 : return sum;
1348 : }
1349 :
1350 : /*
1351 : * hash_seq_init/_search/_term
1352 : * Sequentially search through hash table and return
1353 : * all the elements one by one, return NULL when no more.
1354 : *
1355 : * hash_seq_term should be called if and only if the scan is abandoned before
1356 : * completion; if hash_seq_search returns NULL then it has already done the
1357 : * end-of-scan cleanup.
1358 : *
1359 : * NOTE: caller may delete the returned element before continuing the scan.
1360 : * However, deleting any other element while the scan is in progress is
1361 : * UNDEFINED (it might be the one that curIndex is pointing at!). Also,
1362 : * if elements are added to the table while the scan is in progress, it is
1363 : * unspecified whether they will be visited by the scan or not.
1364 : *
1365 : * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
1366 : * worry about hash_seq_term cleanup, if the hashtable is first locked against
1367 : * further insertions by calling hash_freeze.
1368 : *
1369 : * NOTE: to use this with a partitioned hashtable, caller had better hold
1370 : * at least shared lock on all partitions of the table throughout the scan!
1371 : * We can cope with insertions or deletions by our own backend, but *not*
1372 : * with concurrent insertions or deletions by another.
1373 : */
1374 : void
1375 158036 : hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
1376 : {
1377 158036 : status->hashp = hashp;
1378 158036 : status->curBucket = 0;
1379 158036 : status->curEntry = NULL;
1380 158036 : if (!hashp->frozen)
1381 158036 : register_seq_scan(hashp);
1382 158036 : }
1383 :
1384 : void *
1385 1749359 : hash_seq_search(HASH_SEQ_STATUS *status)
1386 : {
1387 : HTAB *hashp;
1388 : HASHHDR *hctl;
1389 : uint32 max_bucket;
1390 : long ssize;
1391 : long segment_num;
1392 : long segment_ndx;
1393 : HASHSEGMENT segp;
1394 : uint32 curBucket;
1395 : HASHELEMENT *curElem;
1396 :
1397 1749359 : if ((curElem = status->curEntry) != NULL)
1398 : {
1399 : /* Continuing scan of curBucket... */
1400 434159 : status->curEntry = curElem->link;
1401 434159 : if (status->curEntry == NULL) /* end of this bucket */
1402 311924 : ++status->curBucket;
1403 434159 : return (void *) ELEMENTKEY(curElem);
1404 : }
1405 :
1406 : /*
1407 : * Search for next nonempty bucket starting at curBucket.
1408 : */
1409 1315200 : curBucket = status->curBucket;
1410 1315200 : hashp = status->hashp;
1411 1315200 : hctl = hashp->hctl;
1412 1315200 : ssize = hashp->ssize;
1413 1315200 : max_bucket = hctl->max_bucket;
1414 :
1415 1315200 : if (curBucket > max_bucket)
1416 : {
1417 8933 : hash_seq_term(status);
1418 8933 : return NULL; /* search is done */
1419 : }
1420 :
1421 : /*
1422 : * first find the right segment in the table directory.
1423 : */
1424 1306267 : segment_num = curBucket >> hashp->sshift;
1425 1306267 : segment_ndx = MOD(curBucket, ssize);
1426 :
1427 1306267 : segp = hashp->dir[segment_num];
1428 :
1429 : /*
1430 : * Pick up the first item in this bucket's chain. If chain is not empty
1431 : * we can begin searching it. Otherwise we have to advance to find the
1432 : * next nonempty bucket. We try to optimize that case since searching a
1433 : * near-empty hashtable has to iterate this loop a lot.
1434 : */
1435 13262969 : while ((curElem = segp[segment_ndx]) == NULL)
1436 : {
1437 : /* empty bucket, advance to next */
1438 10799436 : if (++curBucket > max_bucket)
1439 : {
1440 149001 : status->curBucket = curBucket;
1441 149001 : hash_seq_term(status);
1442 149001 : return NULL; /* search is done */
1443 : }
1444 10650435 : if (++segment_ndx >= ssize)
1445 : {
1446 6649 : segment_num++;
1447 6649 : segment_ndx = 0;
1448 6649 : segp = hashp->dir[segment_num];
1449 : }
1450 : }
1451 :
1452 : /* Begin scan of curBucket... */
1453 1157266 : status->curEntry = curElem->link;
1454 1157266 : if (status->curEntry == NULL) /* end of this bucket */
1455 845342 : ++curBucket;
1456 1157266 : status->curBucket = curBucket;
1457 1157266 : return (void *) ELEMENTKEY(curElem);
1458 : }
1459 :
1460 : void
1461 158036 : hash_seq_term(HASH_SEQ_STATUS *status)
1462 : {
1463 158036 : if (!status->hashp->frozen)
1464 158036 : deregister_seq_scan(status->hashp);
1465 158036 : }
1466 :
1467 : /*
1468 : * hash_freeze
1469 : * Freeze a hashtable against future insertions (deletions are
1470 : * still allowed)
1471 : *
1472 : * The reason for doing this is that by preventing any more bucket splits,
1473 : * we no longer need to worry about registering hash_seq_search scans,
1474 : * and thus caller need not be careful about ensuring hash_seq_term gets
1475 : * called at the right times.
1476 : *
1477 : * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
1478 : * with active scans (since hash_seq_term would then do the wrong thing).
1479 : */
1480 : void
1481 0 : hash_freeze(HTAB *hashp)
1482 : {
1483 0 : if (hashp->isshared)
1484 0 : elog(ERROR, "cannot freeze shared hashtable \"%s\"", hashp->tabname);
1485 0 : if (!hashp->frozen && has_seq_scans(hashp))
1486 0 : elog(ERROR, "cannot freeze hashtable \"%s\" because it has active scans",
1487 : hashp->tabname);
1488 0 : hashp->frozen = true;
1489 0 : }
1490 :
1491 :
1492 : /********************************* UTILITIES ************************/
1493 :
1494 : /*
1495 : * Expand the table by adding one more hash bucket.
1496 : */
1497 : static bool
1498 18967 : expand_table(HTAB *hashp)
1499 : {
1500 18967 : HASHHDR *hctl = hashp->hctl;
1501 : HASHSEGMENT old_seg,
1502 : new_seg;
1503 : long old_bucket,
1504 : new_bucket;
1505 : long new_segnum,
1506 : new_segndx;
1507 : long old_segnum,
1508 : old_segndx;
1509 : HASHBUCKET *oldlink,
1510 : *newlink;
1511 : HASHBUCKET currElement,
1512 : nextElement;
1513 :
1514 18967 : Assert(!IS_PARTITIONED(hctl));
1515 :
1516 : #ifdef HASH_STATISTICS
1517 : hash_expansions++;
1518 : #endif
1519 :
1520 18967 : new_bucket = hctl->max_bucket + 1;
1521 18967 : new_segnum = new_bucket >> hashp->sshift;
1522 18967 : new_segndx = MOD(new_bucket, hashp->ssize);
1523 :
1524 18967 : if (new_segnum >= hctl->nsegs)
1525 : {
1526 : /* Allocate new segment if necessary -- could fail if dir full */
1527 89 : if (new_segnum >= hctl->dsize)
1528 0 : if (!dir_realloc(hashp))
1529 0 : return false;
1530 89 : if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
1531 0 : return false;
1532 89 : hctl->nsegs++;
1533 : }
1534 :
1535 : /* OK, we created a new bucket */
1536 18967 : hctl->max_bucket++;
1537 :
1538 : /*
1539 : * *Before* changing masks, find old bucket corresponding to same hash
1540 : * values; values in that bucket may need to be relocated to new bucket.
1541 : * Note that new_bucket is certainly larger than low_mask at this point,
1542 : * so we can skip the first step of the regular hash mask calc.
1543 : */
1544 18967 : old_bucket = (new_bucket & hctl->low_mask);
1545 :
1546 : /*
1547 : * If we crossed a power of 2, readjust masks.
1548 : */
1549 18967 : if ((uint32) new_bucket > hctl->high_mask)
1550 : {
1551 207 : hctl->low_mask = hctl->high_mask;
1552 207 : hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
1553 : }
1554 :
1555 : /*
1556 : * Relocate records to the new bucket. NOTE: because of the way the hash
1557 : * masking is done in calc_bucket, only one old bucket can need to be
1558 : * split at this point. With a different way of reducing the hash value,
1559 : * that might not be true!
1560 : */
1561 18967 : old_segnum = old_bucket >> hashp->sshift;
1562 18967 : old_segndx = MOD(old_bucket, hashp->ssize);
1563 :
1564 18967 : old_seg = hashp->dir[old_segnum];
1565 18967 : new_seg = hashp->dir[new_segnum];
1566 :
1567 18967 : oldlink = &old_seg[old_segndx];
1568 18967 : newlink = &new_seg[new_segndx];
1569 :
1570 62324 : for (currElement = *oldlink;
1571 : currElement != NULL;
1572 24390 : currElement = nextElement)
1573 : {
1574 24390 : nextElement = currElement->link;
1575 24390 : if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
1576 : {
1577 12582 : *oldlink = currElement;
1578 12582 : oldlink = &currElement->link;
1579 : }
1580 : else
1581 : {
1582 11808 : *newlink = currElement;
1583 11808 : newlink = &currElement->link;
1584 : }
1585 : }
1586 : /* don't forget to terminate the rebuilt hash chains... */
1587 18967 : *oldlink = NULL;
1588 18967 : *newlink = NULL;
1589 :
1590 18967 : return true;
1591 : }
1592 :
1593 :
1594 : static bool
1595 0 : dir_realloc(HTAB *hashp)
1596 : {
1597 : HASHSEGMENT *p;
1598 : HASHSEGMENT *old_p;
1599 : long new_dsize;
1600 : long old_dirsize;
1601 : long new_dirsize;
1602 :
1603 0 : if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
1604 0 : return false;
1605 :
1606 : /* Reallocate directory */
1607 0 : new_dsize = hashp->hctl->dsize << 1;
1608 0 : old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
1609 0 : new_dirsize = new_dsize * sizeof(HASHSEGMENT);
1610 :
1611 0 : old_p = hashp->dir;
1612 0 : CurrentDynaHashCxt = hashp->hcxt;
1613 0 : p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
1614 :
1615 0 : if (p != NULL)
1616 : {
1617 0 : memcpy(p, old_p, old_dirsize);
1618 0 : MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
1619 0 : hashp->dir = p;
1620 0 : hashp->hctl->dsize = new_dsize;
1621 :
1622 : /* XXX assume the allocator is palloc, so we know how to free */
1623 0 : Assert(hashp->alloc == DynaHashAlloc);
1624 0 : pfree(old_p);
1625 :
1626 0 : return true;
1627 : }
1628 :
1629 0 : return false;
1630 : }
1631 :
1632 :
1633 : static HASHSEGMENT
1634 12585 : seg_alloc(HTAB *hashp)
1635 : {
1636 : HASHSEGMENT segp;
1637 :
1638 12585 : CurrentDynaHashCxt = hashp->hcxt;
1639 12585 : segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
1640 :
1641 12585 : if (!segp)
1642 0 : return NULL;
1643 :
1644 12585 : MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
1645 :
1646 12585 : return segp;
1647 : }
1648 :
1649 : /*
1650 : * allocate some new elements and link them into the indicated free list
1651 : */
1652 : static bool
1653 13564 : element_alloc(HTAB *hashp, int nelem, int freelist_idx)
1654 : {
1655 13564 : HASHHDR *hctl = hashp->hctl;
1656 : Size elementSize;
1657 : HASHELEMENT *firstElement;
1658 : HASHELEMENT *tmpElement;
1659 : HASHELEMENT *prevElement;
1660 : int i;
1661 :
1662 13564 : if (hashp->isfixed)
1663 0 : return false;
1664 :
1665 : /* Each element has a HASHELEMENT header plus user data. */
1666 13564 : elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
1667 :
1668 13564 : CurrentDynaHashCxt = hashp->hcxt;
1669 13564 : firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
1670 :
1671 13564 : if (!firstElement)
1672 0 : return false;
1673 :
1674 : /* prepare to link all the new entries into the freelist */
1675 13564 : prevElement = NULL;
1676 13564 : tmpElement = firstElement;
1677 661202 : for (i = 0; i < nelem; i++)
1678 : {
1679 647638 : tmpElement->link = prevElement;
1680 647638 : prevElement = tmpElement;
1681 647638 : tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
1682 : }
1683 :
1684 : /* if partitioned, must lock to touch freeList */
1685 13564 : if (IS_PARTITIONED(hctl))
1686 800 : SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
1687 :
1688 : /* freelist could be nonempty if two backends did this concurrently */
1689 13564 : firstElement->link = hctl->freeList[freelist_idx].freeList;
1690 13564 : hctl->freeList[freelist_idx].freeList = prevElement;
1691 :
1692 13564 : if (IS_PARTITIONED(hctl))
1693 800 : SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
1694 :
1695 13564 : return true;
1696 : }
1697 :
1698 : /* complain when we have detected a corrupted hashtable */
1699 : static void
1700 0 : hash_corrupted(HTAB *hashp)
1701 : {
1702 : /*
1703 : * If the corruption is in a shared hashtable, we'd better force a
1704 : * systemwide restart. Otherwise, just shut down this one backend.
1705 : */
1706 0 : if (hashp->isshared)
1707 0 : elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);
1708 : else
1709 0 : elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);
1710 : }
1711 :
1712 : /* calculate ceil(log base 2) of num */
1713 : int
1714 47728 : my_log2(long num)
1715 : {
1716 : int i;
1717 : long limit;
1718 :
1719 : /* guard against too-large input, which would put us into infinite loop */
1720 47728 : if (num > LONG_MAX / 2)
1721 0 : num = LONG_MAX / 2;
1722 :
1723 47728 : for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
1724 : ;
1725 47728 : return i;
1726 : }
1727 :
1728 : /* calculate first power of 2 >= num, bounded to what will fit in a long */
1729 : static long
1730 140 : next_pow2_long(long num)
1731 : {
1732 : /* my_log2's internal range check is sufficient */
1733 140 : return 1L << my_log2(num);
1734 : }
1735 :
1736 : /* calculate first power of 2 >= num, bounded to what will fit in an int */
1737 : static int
1738 19563 : next_pow2_int(long num)
1739 : {
1740 19563 : if (num > INT_MAX / 2)
1741 0 : num = INT_MAX / 2;
1742 19563 : return 1 << my_log2(num);
1743 : }
1744 :
1745 :
1746 : /************************* SEQ SCAN TRACKING ************************/
1747 :
1748 : /*
1749 : * We track active hash_seq_search scans here. The need for this mechanism
1750 : * comes from the fact that a scan will get confused if a bucket split occurs
1751 : * while it's in progress: it might visit entries twice, or even miss some
1752 : * entirely (if it's partway through the same bucket that splits). Hence
1753 : * we want to inhibit bucket splits if there are any active scans on the
1754 : * table being inserted into. This is a fairly rare case in current usage,
1755 : * so just postponing the split until the next insertion seems sufficient.
1756 : *
1757 : * Given present usages of the function, only a few scans are likely to be
1758 : * open concurrently; so a finite-size stack of open scans seems sufficient,
1759 : * and we don't worry that linear search is too slow. Note that we do
1760 : * allow multiple scans of the same hashtable to be open concurrently.
1761 : *
1762 : * This mechanism can support concurrent scan and insertion in a shared
1763 : * hashtable if it's the same backend doing both. It would fail otherwise,
1764 : * but locking reasons seem to preclude any such scenario anyway, so we don't
1765 : * worry.
1766 : *
1767 : * This arrangement is reasonably robust if a transient hashtable is deleted
1768 : * without notifying us. The absolute worst case is we might inhibit splits
1769 : * in another table created later at exactly the same address. We will give
1770 : * a warning at transaction end for reference leaks, so any bugs leading to
1771 : * lack of notification should be easy to catch.
1772 : */
1773 :
1774 : #define MAX_SEQ_SCANS 100
1775 :
1776 : static HTAB *seq_scan_tables[MAX_SEQ_SCANS]; /* tables being scanned */
1777 : static int seq_scan_level[MAX_SEQ_SCANS]; /* subtransaction nest level */
1778 : static int num_seq_scans = 0;
1779 :
1780 :
1781 : /* Register a table as having an active hash_seq_search scan */
1782 : static void
1783 158036 : register_seq_scan(HTAB *hashp)
1784 : {
1785 158036 : if (num_seq_scans >= MAX_SEQ_SCANS)
1786 0 : elog(ERROR, "too many active hash_seq_search scans, cannot start one on \"%s\"",
1787 : hashp->tabname);
1788 158036 : seq_scan_tables[num_seq_scans] = hashp;
1789 158036 : seq_scan_level[num_seq_scans] = GetCurrentTransactionNestLevel();
1790 158036 : num_seq_scans++;
1791 158036 : }
1792 :
1793 : /* Deregister an active scan */
1794 : static void
1795 158036 : deregister_seq_scan(HTAB *hashp)
1796 : {
1797 : int i;
1798 :
1799 : /* Search backward since it's most likely at the stack top */
1800 158036 : for (i = num_seq_scans - 1; i >= 0; i--)
1801 : {
1802 158036 : if (seq_scan_tables[i] == hashp)
1803 : {
1804 158036 : seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
1805 158036 : seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
1806 158036 : num_seq_scans--;
1807 316072 : return;
1808 : }
1809 : }
1810 0 : elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
1811 : hashp->tabname);
1812 : }
1813 :
1814 : /* Check if a table has any active scan */
1815 : static bool
1816 18967 : has_seq_scans(HTAB *hashp)
1817 : {
1818 : int i;
1819 :
1820 18967 : for (i = 0; i < num_seq_scans; i++)
1821 : {
1822 0 : if (seq_scan_tables[i] == hashp)
1823 0 : return true;
1824 : }
1825 18967 : return false;
1826 : }
1827 :
1828 : /* Clean up any open scans at end of transaction */
1829 : void
1830 26218 : AtEOXact_HashTables(bool isCommit)
1831 : {
1832 : /*
1833 : * During abort cleanup, open scans are expected; just silently clean 'em
1834 : * out. An open scan at commit means someone forgot a hash_seq_term()
1835 : * call, so complain.
1836 : *
1837 : * Note: it's tempting to try to print the tabname here, but refrain for
1838 : * fear of touching deallocated memory. This isn't a user-facing message
1839 : * anyway, so it needn't be pretty.
1840 : */
1841 26218 : if (isCommit)
1842 : {
1843 : int i;
1844 :
1845 22912 : for (i = 0; i < num_seq_scans; i++)
1846 : {
1847 0 : elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1848 : seq_scan_tables[i]);
1849 : }
1850 : }
1851 26218 : num_seq_scans = 0;
1852 26218 : }
1853 :
1854 : /* Clean up any open scans at end of subtransaction */
1855 : void
1856 372 : AtEOSubXact_HashTables(bool isCommit, int nestDepth)
1857 : {
1858 : int i;
1859 :
1860 : /*
1861 : * Search backward to make cleanup easy. Note we must check all entries,
1862 : * not only those at the end of the array, because deletion technique
1863 : * doesn't keep them in order.
1864 : */
1865 372 : for (i = num_seq_scans - 1; i >= 0; i--)
1866 : {
1867 0 : if (seq_scan_level[i] >= nestDepth)
1868 : {
1869 0 : if (isCommit)
1870 0 : elog(WARNING, "leaked hash_seq_search scan for hash table %p",
1871 : seq_scan_tables[i]);
1872 0 : seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
1873 0 : seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
1874 0 : num_seq_scans--;
1875 : }
1876 : }
1877 372 : }
|