Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dshash.c
4 : * Concurrent hash tables backed by dynamic shared memory areas.
5 : *
6 : * This is an open hashing hash table, with a linked list at each table
7 : * entry. It supports dynamic resizing, as required to prevent the linked
8 : * lists from growing too long on average. Currently, only growing is
9 : * supported: the hash table never becomes smaller.
10 : *
11 : * To deal with concurrency, it has a fixed size set of partitions, each of
12 : * which is independently locked. Each bucket maps to a partition; so insert,
13 : * find and iterate operations normally only acquire one lock. Therefore,
14 : * good concurrency is achieved whenever such operations don't collide at the
15 : * lock partition level. However, when a resize operation begins, all
16 : * partition locks must be acquired simultaneously for a brief period. This
17 : * is only expected to happen a small number of times until a stable size is
18 : * found, since growth is geometric.
19 : *
20 : * Future versions may support iterators and incremental resizing; for now
21 : * the implementation is minimalist.
22 : *
23 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
24 : * Portions Copyright (c) 1994, Regents of the University of California
25 : *
26 : * IDENTIFICATION
27 : * src/backend/lib/dshash.c
28 : *
29 : *-------------------------------------------------------------------------
30 : */
31 :
32 : #include "postgres.h"
33 :
34 : #include "lib/dshash.h"
35 : #include "storage/ipc.h"
36 : #include "storage/lwlock.h"
37 : #include "utils/dsa.h"
38 : #include "utils/hsearch.h"
39 : #include "utils/memutils.h"
40 :
41 : /*
42 : * An item in the hash table. This wraps the user's entry object in an
43 : * envelop that holds a pointer back to the bucket and a pointer to the next
44 : * item in the bucket.
45 : */
46 : struct dshash_table_item
47 : {
48 : /* The next item in the same bucket. */
49 : dsa_pointer next;
50 : /* The hashed key, to avoid having to recompute it. */
51 : dshash_hash hash;
52 : /* The user's entry object follows here. See ENTRY_FROM_ITEM(item). */
53 : };
54 :
55 : /*
56 : * The number of partitions for locking purposes. This is set to match
57 : * NUM_BUFFER_PARTITIONS for now, on the basis that whatever's good enough for
58 : * the buffer pool must be good enough for any other purpose. This could
59 : * become a runtime parameter in future.
60 : */
61 : #define DSHASH_NUM_PARTITIONS_LOG2 7
62 : #define DSHASH_NUM_PARTITIONS (1 << DSHASH_NUM_PARTITIONS_LOG2)
63 :
64 : /* A magic value used to identify our hash tables. */
65 : #define DSHASH_MAGIC 0x75ff6a20
66 :
67 : /*
68 : * Tracking information for each lock partition. Initially, each partition
69 : * corresponds to one bucket, but each time the hash table grows, the buckets
70 : * covered by each partition split so the number of buckets covered doubles.
71 : *
72 : * We might want to add padding here so that each partition is on a different
73 : * cache line, but doing so would bloat this structure considerably.
74 : */
75 : typedef struct dshash_partition
76 : {
77 : LWLock lock; /* Protects all buckets in this partition. */
78 : size_t count; /* # of items in this partition's buckets */
79 : } dshash_partition;
80 :
81 : /*
82 : * The head object for a hash table. This will be stored in dynamic shared
83 : * memory.
84 : */
85 : typedef struct dshash_table_control
86 : {
87 : dshash_table_handle handle;
88 : uint32 magic;
89 : dshash_partition partitions[DSHASH_NUM_PARTITIONS];
90 : int lwlock_tranche_id;
91 :
92 : /*
93 : * The following members are written to only when ALL partitions locks are
94 : * held. They can be read when any one partition lock is held.
95 : */
96 :
97 : /* Number of buckets expressed as power of 2 (8 = 256 buckets). */
98 : size_t size_log2; /* log2(number of buckets) */
99 : dsa_pointer buckets; /* current bucket array */
100 : } dshash_table_control;
101 :
102 : /*
103 : * Per-backend state for a dynamic hash table.
104 : */
105 : struct dshash_table
106 : {
107 : dsa_area *area; /* Backing dynamic shared memory area. */
108 : dshash_parameters params; /* Parameters. */
109 : void *arg; /* User-supplied data pointer. */
110 : dshash_table_control *control; /* Control object in DSM. */
111 : dsa_pointer *buckets; /* Current bucket pointers in DSM. */
112 : size_t size_log2; /* log2(number of buckets) */
113 : bool find_locked; /* Is any partition lock held by 'find'? */
114 : bool find_exclusively_locked; /* ... exclusively? */
115 : };
116 :
117 : /* Given a pointer to an item, find the entry (user data) it holds. */
118 : #define ENTRY_FROM_ITEM(item) \
119 : ((char *)(item) + MAXALIGN(sizeof(dshash_table_item)))
120 :
121 : /* Given a pointer to an entry, find the item that holds it. */
122 : #define ITEM_FROM_ENTRY(entry) \
123 : ((dshash_table_item *)((char *)(entry) - \
124 : MAXALIGN(sizeof(dshash_table_item))))
125 :
126 : /* How many resize operations (bucket splits) have there been? */
127 : #define NUM_SPLITS(size_log2) \
128 : (size_log2 - DSHASH_NUM_PARTITIONS_LOG2)
129 :
130 : /* How many buckets are there in each partition at a given size? */
131 : #define BUCKETS_PER_PARTITION(size_log2) \
132 : (UINT64CONST(1) << NUM_SPLITS(size_log2))
133 :
134 : /* Max entries before we need to grow. Half + quarter = 75% load factor. */
135 : #define MAX_COUNT_PER_PARTITION(hash_table) \
136 : (BUCKETS_PER_PARTITION(hash_table->size_log2) / 2 + \
137 : BUCKETS_PER_PARTITION(hash_table->size_log2) / 4)
138 :
139 : /* Choose partition based on the highest order bits of the hash. */
140 : #define PARTITION_FOR_HASH(hash) \
141 : (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - DSHASH_NUM_PARTITIONS_LOG2))
142 :
143 : /*
144 : * Find the bucket index for a given hash and table size. Each time the table
145 : * doubles in size, the appropriate bucket for a given hash value doubles and
146 : * possibly adds one, depending on the newly revealed bit, so that all buckets
147 : * are split.
148 : */
149 : #define BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, size_log2) \
150 : (hash >> ((sizeof(dshash_hash) * CHAR_BIT) - (size_log2)))
151 :
152 : /* The index of the first bucket in a given partition. */
153 : #define BUCKET_INDEX_FOR_PARTITION(partition, size_log2) \
154 : ((partition) << NUM_SPLITS(size_log2))
155 :
156 : /* The head of the active bucket for a given hash value (lvalue). */
157 : #define BUCKET_FOR_HASH(hash_table, hash) \
158 : (hash_table->buckets[ \
159 : BUCKET_INDEX_FOR_HASH_AND_SIZE(hash, \
160 : hash_table->size_log2)])
161 :
162 : static void delete_item(dshash_table *hash_table,
163 : dshash_table_item *item);
164 : static void resize(dshash_table *hash_table, size_t new_size);
165 : static inline void ensure_valid_bucket_pointers(dshash_table *hash_table);
166 : static inline dshash_table_item *find_in_bucket(dshash_table *hash_table,
167 : const void *key,
168 : dsa_pointer item_pointer);
169 : static void insert_item_into_bucket(dshash_table *hash_table,
170 : dsa_pointer item_pointer,
171 : dshash_table_item *item,
172 : dsa_pointer *bucket);
173 : static dshash_table_item *insert_into_bucket(dshash_table *hash_table,
174 : const void *key,
175 : dsa_pointer *bucket);
176 : static bool delete_key_from_bucket(dshash_table *hash_table,
177 : const void *key,
178 : dsa_pointer *bucket_head);
179 : static bool delete_item_from_bucket(dshash_table *hash_table,
180 : dshash_table_item *item,
181 : dsa_pointer *bucket_head);
182 : static inline dshash_hash hash_key(dshash_table *hash_table, const void *key);
183 : static inline bool equal_keys(dshash_table *hash_table,
184 : const void *a, const void *b);
185 :
186 : #define PARTITION_LOCK(hash_table, i) \
187 : (&(hash_table)->control->partitions[(i)].lock)
188 :
189 : /*
190 : * Create a new hash table backed by the given dynamic shared area, with the
191 : * given parameters. The returned object is allocated in backend-local memory
192 : * using the current MemoryContext. 'arg' will be passed through to the
193 : * compare and hash functions.
194 : */
195 : dshash_table *
196 0 : dshash_create(dsa_area *area, const dshash_parameters *params, void *arg)
197 : {
198 : dshash_table *hash_table;
199 : dsa_pointer control;
200 :
201 : /* Allocate the backend-local object representing the hash table. */
202 0 : hash_table = palloc(sizeof(dshash_table));
203 :
204 : /* Allocate the control object in shared memory. */
205 0 : control = dsa_allocate(area, sizeof(dshash_table_control));
206 :
207 : /* Set up the local and shared hash table structs. */
208 0 : hash_table->area = area;
209 0 : hash_table->params = *params;
210 0 : hash_table->arg = arg;
211 0 : hash_table->control = dsa_get_address(area, control);
212 0 : hash_table->control->handle = control;
213 0 : hash_table->control->magic = DSHASH_MAGIC;
214 0 : hash_table->control->lwlock_tranche_id = params->tranche_id;
215 :
216 : /* Set up the array of lock partitions. */
217 : {
218 0 : dshash_partition *partitions = hash_table->control->partitions;
219 0 : int tranche_id = hash_table->control->lwlock_tranche_id;
220 : int i;
221 :
222 0 : for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
223 : {
224 0 : LWLockInitialize(&partitions[i].lock, tranche_id);
225 0 : partitions[i].count = 0;
226 : }
227 : }
228 :
229 0 : hash_table->find_locked = false;
230 0 : hash_table->find_exclusively_locked = false;
231 :
232 : /*
233 : * Set up the initial array of buckets. Our initial size is the same as
234 : * the number of partitions.
235 : */
236 0 : hash_table->control->size_log2 = DSHASH_NUM_PARTITIONS_LOG2;
237 0 : hash_table->control->buckets =
238 0 : dsa_allocate_extended(area,
239 : sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS,
240 : DSA_ALLOC_NO_OOM | DSA_ALLOC_ZERO);
241 0 : if (!DsaPointerIsValid(hash_table->control->buckets))
242 : {
243 0 : dsa_free(area, control);
244 0 : ereport(ERROR,
245 : (errcode(ERRCODE_OUT_OF_MEMORY),
246 : errmsg("out of memory"),
247 : errdetail("Failed on DSA request of size %zu.",
248 : sizeof(dsa_pointer) * DSHASH_NUM_PARTITIONS)));
249 : }
250 0 : hash_table->buckets = dsa_get_address(area,
251 0 : hash_table->control->buckets);
252 :
253 0 : return hash_table;
254 : }
255 :
256 : /*
257 : * Attach to an existing hash table using a handle. The returned object is
258 : * allocated in backend-local memory using the current MemoryContext. 'arg'
259 : * will be passed through to the compare and hash functions.
260 : */
261 : dshash_table *
262 0 : dshash_attach(dsa_area *area, const dshash_parameters *params,
263 : dshash_table_handle handle, void *arg)
264 : {
265 : dshash_table *hash_table;
266 : dsa_pointer control;
267 :
268 : /* Allocate the backend-local object representing the hash table. */
269 0 : hash_table = palloc(sizeof(dshash_table));
270 :
271 : /* Find the control object in shared memory. */
272 0 : control = handle;
273 :
274 : /* Set up the local hash table struct. */
275 0 : hash_table->area = area;
276 0 : hash_table->params = *params;
277 0 : hash_table->arg = arg;
278 0 : hash_table->control = dsa_get_address(area, control);
279 0 : hash_table->find_locked = false;
280 0 : hash_table->find_exclusively_locked = false;
281 0 : Assert(hash_table->control->magic == DSHASH_MAGIC);
282 :
283 0 : return hash_table;
284 : }
285 :
286 : /*
287 : * Detach from a hash table. This frees backend-local resources associated
288 : * with the hash table, but the hash table will continue to exist until it is
289 : * either explicitly destroyed (by a backend that is still attached to it), or
290 : * the area that backs it is returned to the operating system.
291 : */
292 : void
293 0 : dshash_detach(dshash_table *hash_table)
294 : {
295 0 : Assert(!hash_table->find_locked);
296 :
297 : /* The hash table may have been destroyed. Just free local memory. */
298 0 : pfree(hash_table);
299 0 : }
300 :
301 : /*
302 : * Destroy a hash table, returning all memory to the area. The caller must be
303 : * certain that no other backend will attempt to access the hash table before
304 : * calling this function. Other backend must explicitly call dshash_detach to
305 : * free up backend-local memory associated with the hash table. The backend
306 : * that calls dshash_destroy must not call dshash_detach.
307 : */
308 : void
309 0 : dshash_destroy(dshash_table *hash_table)
310 : {
311 : size_t size;
312 : size_t i;
313 :
314 0 : Assert(hash_table->control->magic == DSHASH_MAGIC);
315 0 : ensure_valid_bucket_pointers(hash_table);
316 :
317 : /* Free all the entries. */
318 0 : size = ((size_t) 1) << hash_table->size_log2;
319 0 : for (i = 0; i < size; ++i)
320 : {
321 0 : dsa_pointer item_pointer = hash_table->buckets[i];
322 :
323 0 : while (DsaPointerIsValid(item_pointer))
324 : {
325 : dshash_table_item *item;
326 : dsa_pointer next_item_pointer;
327 :
328 0 : item = dsa_get_address(hash_table->area, item_pointer);
329 0 : next_item_pointer = item->next;
330 0 : dsa_free(hash_table->area, item_pointer);
331 0 : item_pointer = next_item_pointer;
332 : }
333 : }
334 :
335 : /*
336 : * Vandalize the control block to help catch programming errors where
337 : * other backends access the memory formerly occupied by this hash table.
338 : */
339 0 : hash_table->control->magic = 0;
340 :
341 : /* Free the active table and control object. */
342 0 : dsa_free(hash_table->area, hash_table->control->buckets);
343 0 : dsa_free(hash_table->area, hash_table->control->handle);
344 :
345 0 : pfree(hash_table);
346 0 : }
347 :
348 : /*
349 : * Get a handle that can be used by other processes to attach to this hash
350 : * table.
351 : */
352 : dshash_table_handle
353 0 : dshash_get_hash_table_handle(dshash_table *hash_table)
354 : {
355 0 : Assert(hash_table->control->magic == DSHASH_MAGIC);
356 :
357 0 : return hash_table->control->handle;
358 : }
359 :
360 : /*
361 : * Look up an entry, given a key. Returns a pointer to an entry if one can be
362 : * found with the given key. Returns NULL if the key is not found. If a
363 : * non-NULL value is returned, the entry is locked and must be released by
364 : * calling dshash_release_lock. If an error is raised before
365 : * dshash_release_lock is called, the lock will be released automatically, but
366 : * the caller must take care to ensure that the entry is not left corrupted.
367 : * The lock mode is either shared or exclusive depending on 'exclusive'.
368 : *
369 : * The caller must not lock a lock already.
370 : *
371 : * Note that the lock held is in fact an LWLock, so interrupts will be held on
372 : * return from this function, and not resumed until dshash_release_lock is
373 : * called. It is a very good idea for the caller to release the lock quickly.
374 : */
375 : void *
376 0 : dshash_find(dshash_table *hash_table, const void *key, bool exclusive)
377 : {
378 : dshash_hash hash;
379 : size_t partition;
380 : dshash_table_item *item;
381 :
382 0 : hash = hash_key(hash_table, key);
383 0 : partition = PARTITION_FOR_HASH(hash);
384 :
385 0 : Assert(hash_table->control->magic == DSHASH_MAGIC);
386 0 : Assert(!hash_table->find_locked);
387 :
388 0 : LWLockAcquire(PARTITION_LOCK(hash_table, partition),
389 : exclusive ? LW_EXCLUSIVE : LW_SHARED);
390 0 : ensure_valid_bucket_pointers(hash_table);
391 :
392 : /* Search the active bucket. */
393 0 : item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
394 :
395 0 : if (!item)
396 : {
397 : /* Not found. */
398 0 : LWLockRelease(PARTITION_LOCK(hash_table, partition));
399 0 : return NULL;
400 : }
401 : else
402 : {
403 : /* The caller will free the lock by calling dshash_release. */
404 0 : hash_table->find_locked = true;
405 0 : hash_table->find_exclusively_locked = exclusive;
406 0 : return ENTRY_FROM_ITEM(item);
407 : }
408 : }
409 :
410 : /*
411 : * Returns a pointer to an exclusively locked item which must be released with
412 : * dshash_release_lock. If the key is found in the hash table, 'found' is set
413 : * to true and a pointer to the existing entry is returned. If the key is not
414 : * found, 'found' is set to false, and a pointer to a newly created entry is
415 : * returned.
416 : *
417 : * Notes above dshash_find() regarding locking and error handling equally
418 : * apply here.
419 : */
420 : void *
421 0 : dshash_find_or_insert(dshash_table *hash_table,
422 : const void *key,
423 : bool *found)
424 : {
425 : dshash_hash hash;
426 : size_t partition_index;
427 : dshash_partition *partition;
428 : dshash_table_item *item;
429 :
430 0 : hash = hash_key(hash_table, key);
431 0 : partition_index = PARTITION_FOR_HASH(hash);
432 0 : partition = &hash_table->control->partitions[partition_index];
433 :
434 0 : Assert(hash_table->control->magic == DSHASH_MAGIC);
435 0 : Assert(!hash_table->find_locked);
436 :
437 : restart:
438 0 : LWLockAcquire(PARTITION_LOCK(hash_table, partition_index),
439 : LW_EXCLUSIVE);
440 0 : ensure_valid_bucket_pointers(hash_table);
441 :
442 : /* Search the active bucket. */
443 0 : item = find_in_bucket(hash_table, key, BUCKET_FOR_HASH(hash_table, hash));
444 :
445 0 : if (item)
446 0 : *found = true;
447 : else
448 : {
449 0 : *found = false;
450 :
451 : /* Check if we are getting too full. */
452 0 : if (partition->count > MAX_COUNT_PER_PARTITION(hash_table))
453 : {
454 : /*
455 : * The load factor (= keys / buckets) for all buckets protected by
456 : * this partition is > 0.75. Presumably the same applies
457 : * generally across the whole hash table (though we don't attempt
458 : * to track that directly to avoid contention on some kind of
459 : * central counter; we just assume that this partition is
460 : * representative). This is a good time to resize.
461 : *
462 : * Give up our existing lock first, because resizing needs to
463 : * reacquire all the locks in the right order to avoid deadlocks.
464 : */
465 0 : LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
466 0 : resize(hash_table, hash_table->size_log2 + 1);
467 :
468 0 : goto restart;
469 : }
470 :
471 : /* Finally we can try to insert the new item. */
472 0 : item = insert_into_bucket(hash_table, key,
473 0 : &BUCKET_FOR_HASH(hash_table, hash));
474 0 : item->hash = hash;
475 : /* Adjust per-lock-partition counter for load factor knowledge. */
476 0 : ++partition->count;
477 : }
478 :
479 : /* The caller must release the lock with dshash_release_lock. */
480 0 : hash_table->find_locked = true;
481 0 : hash_table->find_exclusively_locked = true;
482 0 : return ENTRY_FROM_ITEM(item);
483 : }
484 :
485 : /*
486 : * Remove an entry by key. Returns true if the key was found and the
487 : * corresponding entry was removed.
488 : *
489 : * To delete an entry that you already have a pointer to, see
490 : * dshash_delete_entry.
491 : */
492 : bool
493 0 : dshash_delete_key(dshash_table *hash_table, const void *key)
494 : {
495 : dshash_hash hash;
496 : size_t partition;
497 : bool found;
498 :
499 0 : Assert(hash_table->control->magic == DSHASH_MAGIC);
500 0 : Assert(!hash_table->find_locked);
501 :
502 0 : hash = hash_key(hash_table, key);
503 0 : partition = PARTITION_FOR_HASH(hash);
504 :
505 0 : LWLockAcquire(PARTITION_LOCK(hash_table, partition), LW_EXCLUSIVE);
506 0 : ensure_valid_bucket_pointers(hash_table);
507 :
508 0 : if (delete_key_from_bucket(hash_table, key,
509 0 : &BUCKET_FOR_HASH(hash_table, hash)))
510 : {
511 0 : Assert(hash_table->control->partitions[partition].count > 0);
512 0 : found = true;
513 0 : --hash_table->control->partitions[partition].count;
514 : }
515 : else
516 0 : found = false;
517 :
518 0 : LWLockRelease(PARTITION_LOCK(hash_table, partition));
519 :
520 0 : return found;
521 : }
522 :
523 : /*
524 : * Remove an entry. The entry must already be exclusively locked, and must
525 : * have been obtained by dshash_find or dshash_find_or_insert. Note that this
526 : * function releases the lock just like dshash_release_lock.
527 : *
528 : * To delete an entry by key, see dshash_delete_key.
529 : */
530 : void
531 0 : dshash_delete_entry(dshash_table *hash_table, void *entry)
532 : {
533 0 : dshash_table_item *item = ITEM_FROM_ENTRY(entry);
534 0 : size_t partition = PARTITION_FOR_HASH(item->hash);
535 :
536 0 : Assert(hash_table->control->magic == DSHASH_MAGIC);
537 0 : Assert(hash_table->find_locked);
538 0 : Assert(hash_table->find_exclusively_locked);
539 0 : Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition),
540 : LW_EXCLUSIVE));
541 :
542 0 : delete_item(hash_table, item);
543 0 : hash_table->find_locked = false;
544 0 : hash_table->find_exclusively_locked = false;
545 0 : LWLockRelease(PARTITION_LOCK(hash_table, partition));
546 0 : }
547 :
548 : /*
549 : * Unlock an entry which was locked by dshash_find or dshash_find_or_insert.
550 : */
551 : void
552 0 : dshash_release_lock(dshash_table *hash_table, void *entry)
553 : {
554 0 : dshash_table_item *item = ITEM_FROM_ENTRY(entry);
555 0 : size_t partition_index = PARTITION_FOR_HASH(item->hash);
556 :
557 0 : Assert(hash_table->control->magic == DSHASH_MAGIC);
558 0 : Assert(hash_table->find_locked);
559 0 : Assert(LWLockHeldByMeInMode(PARTITION_LOCK(hash_table, partition_index),
560 : hash_table->find_exclusively_locked
561 : ? LW_EXCLUSIVE : LW_SHARED));
562 :
563 0 : hash_table->find_locked = false;
564 0 : hash_table->find_exclusively_locked = false;
565 0 : LWLockRelease(PARTITION_LOCK(hash_table, partition_index));
566 0 : }
567 :
568 : /*
569 : * A compare function that forwards to memcmp.
570 : */
571 : int
572 0 : dshash_memcmp(const void *a, const void *b, size_t size, void *arg)
573 : {
574 0 : return memcmp(a, b, size);
575 : }
576 :
577 : /*
578 : * A hash function that forwards to tag_hash.
579 : */
580 : dshash_hash
581 0 : dshash_memhash(const void *v, size_t size, void *arg)
582 : {
583 0 : return tag_hash(v, size);
584 : }
585 :
586 : /*
587 : * Print debugging information about the internal state of the hash table to
588 : * stderr. The caller must hold no partition locks.
589 : */
590 : void
591 0 : dshash_dump(dshash_table *hash_table)
592 : {
593 : size_t i;
594 : size_t j;
595 :
596 0 : Assert(hash_table->control->magic == DSHASH_MAGIC);
597 0 : Assert(!hash_table->find_locked);
598 :
599 0 : for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
600 : {
601 0 : Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
602 0 : LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_SHARED);
603 : }
604 :
605 0 : ensure_valid_bucket_pointers(hash_table);
606 :
607 0 : fprintf(stderr,
608 0 : "hash table size = %zu\n", (size_t) 1 << hash_table->size_log2);
609 0 : for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
610 : {
611 0 : dshash_partition *partition = &hash_table->control->partitions[i];
612 0 : size_t begin = BUCKET_INDEX_FOR_PARTITION(i, hash_table->size_log2);
613 0 : size_t end = BUCKET_INDEX_FOR_PARTITION(i + 1, hash_table->size_log2);
614 :
615 0 : fprintf(stderr, " partition %zu\n", i);
616 0 : fprintf(stderr,
617 : " active buckets (key count = %zu)\n", partition->count);
618 :
619 0 : for (j = begin; j < end; ++j)
620 : {
621 0 : size_t count = 0;
622 0 : dsa_pointer bucket = hash_table->buckets[j];
623 :
624 0 : while (DsaPointerIsValid(bucket))
625 : {
626 : dshash_table_item *item;
627 :
628 0 : item = dsa_get_address(hash_table->area, bucket);
629 :
630 0 : bucket = item->next;
631 0 : ++count;
632 : }
633 0 : fprintf(stderr, " bucket %zu (key count = %zu)\n", j, count);
634 : }
635 : }
636 :
637 0 : for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
638 0 : LWLockRelease(PARTITION_LOCK(hash_table, i));
639 0 : }
640 :
641 : /*
642 : * Delete a locked item to which we have a pointer.
643 : */
644 : static void
645 0 : delete_item(dshash_table *hash_table, dshash_table_item *item)
646 : {
647 0 : size_t hash = item->hash;
648 0 : size_t partition = PARTITION_FOR_HASH(hash);
649 :
650 0 : Assert(LWLockHeldByMe(PARTITION_LOCK(hash_table, partition)));
651 :
652 0 : if (delete_item_from_bucket(hash_table, item,
653 0 : &BUCKET_FOR_HASH(hash_table, hash)))
654 : {
655 0 : Assert(hash_table->control->partitions[partition].count > 0);
656 0 : --hash_table->control->partitions[partition].count;
657 : }
658 : else
659 : {
660 0 : Assert(false);
661 : }
662 0 : }
663 :
664 : /*
665 : * Grow the hash table if necessary to the requested number of buckets. The
666 : * requested size must be double some previously observed size. Returns true
667 : * if the table was successfully expanded or found to be big enough already
668 : * (because another backend expanded it).
669 : *
670 : * Must be called without any partition lock held.
671 : */
672 : static void
673 0 : resize(dshash_table *hash_table, size_t new_size_log2)
674 : {
675 : dsa_pointer old_buckets;
676 : dsa_pointer new_buckets_shared;
677 : dsa_pointer *new_buckets;
678 : size_t size;
679 0 : size_t new_size = ((size_t) 1) << new_size_log2;
680 : size_t i;
681 :
682 : /*
683 : * Acquire the locks for all lock partitions. This is expensive, but we
684 : * shouldn't have to do it many times.
685 : */
686 0 : for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
687 : {
688 0 : Assert(!LWLockHeldByMe(PARTITION_LOCK(hash_table, i)));
689 :
690 0 : LWLockAcquire(PARTITION_LOCK(hash_table, i), LW_EXCLUSIVE);
691 0 : if (i == 0 && hash_table->control->size_log2 >= new_size_log2)
692 : {
693 : /*
694 : * Another backend has already increased the size; we can avoid
695 : * obtaining all the locks and return early.
696 : */
697 0 : LWLockRelease(PARTITION_LOCK(hash_table, 0));
698 0 : return;
699 : }
700 : }
701 :
702 0 : Assert(new_size_log2 == hash_table->control->size_log2 + 1);
703 :
704 : /* Allocate the space for the new table. */
705 0 : new_buckets_shared = dsa_allocate0(hash_table->area,
706 : sizeof(dsa_pointer) * new_size);
707 0 : new_buckets = dsa_get_address(hash_table->area, new_buckets_shared);
708 :
709 : /*
710 : * We've allocated the new bucket array; all that remains to do now is to
711 : * reinsert all items, which amounts to adjusting all the pointers.
712 : */
713 0 : size = ((size_t) 1) << hash_table->control->size_log2;
714 0 : for (i = 0; i < size; ++i)
715 : {
716 0 : dsa_pointer item_pointer = hash_table->buckets[i];
717 :
718 0 : while (DsaPointerIsValid(item_pointer))
719 : {
720 : dshash_table_item *item;
721 : dsa_pointer next_item_pointer;
722 :
723 0 : item = dsa_get_address(hash_table->area, item_pointer);
724 0 : next_item_pointer = item->next;
725 0 : insert_item_into_bucket(hash_table, item_pointer, item,
726 0 : &new_buckets[BUCKET_INDEX_FOR_HASH_AND_SIZE(item->hash,
727 : new_size_log2)]);
728 0 : item_pointer = next_item_pointer;
729 : }
730 : }
731 :
732 : /* Swap the hash table into place and free the old one. */
733 0 : old_buckets = hash_table->control->buckets;
734 0 : hash_table->control->buckets = new_buckets_shared;
735 0 : hash_table->control->size_log2 = new_size_log2;
736 0 : hash_table->buckets = new_buckets;
737 0 : dsa_free(hash_table->area, old_buckets);
738 :
739 : /* Release all the locks. */
740 0 : for (i = 0; i < DSHASH_NUM_PARTITIONS; ++i)
741 0 : LWLockRelease(PARTITION_LOCK(hash_table, i));
742 : }
743 :
744 : /*
745 : * Make sure that our backend-local bucket pointers are up to date. The
746 : * caller must have locked one lock partition, which prevents resize() from
747 : * running concurrently.
748 : */
749 : static inline void
750 0 : ensure_valid_bucket_pointers(dshash_table *hash_table)
751 : {
752 0 : if (hash_table->size_log2 != hash_table->control->size_log2)
753 : {
754 0 : hash_table->buckets = dsa_get_address(hash_table->area,
755 0 : hash_table->control->buckets);
756 0 : hash_table->size_log2 = hash_table->control->size_log2;
757 : }
758 0 : }
759 :
760 : /*
761 : * Scan a locked bucket for a match, using the provided compare function.
762 : */
763 : static inline dshash_table_item *
764 0 : find_in_bucket(dshash_table *hash_table, const void *key,
765 : dsa_pointer item_pointer)
766 : {
767 0 : while (DsaPointerIsValid(item_pointer))
768 : {
769 : dshash_table_item *item;
770 :
771 0 : item = dsa_get_address(hash_table->area, item_pointer);
772 0 : if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
773 0 : return item;
774 0 : item_pointer = item->next;
775 : }
776 0 : return NULL;
777 : }
778 :
779 : /*
780 : * Insert an already-allocated item into a bucket.
781 : */
782 : static void
783 0 : insert_item_into_bucket(dshash_table *hash_table,
784 : dsa_pointer item_pointer,
785 : dshash_table_item *item,
786 : dsa_pointer *bucket)
787 : {
788 0 : Assert(item == dsa_get_address(hash_table->area, item_pointer));
789 :
790 0 : item->next = *bucket;
791 0 : *bucket = item_pointer;
792 0 : }
793 :
794 : /*
795 : * Allocate space for an entry with the given key and insert it into the
796 : * provided bucket.
797 : */
798 : static dshash_table_item *
799 0 : insert_into_bucket(dshash_table *hash_table,
800 : const void *key,
801 : dsa_pointer *bucket)
802 : {
803 : dsa_pointer item_pointer;
804 : dshash_table_item *item;
805 :
806 0 : item_pointer = dsa_allocate(hash_table->area,
807 : hash_table->params.entry_size +
808 : MAXALIGN(sizeof(dshash_table_item)));
809 0 : item = dsa_get_address(hash_table->area, item_pointer);
810 0 : memcpy(ENTRY_FROM_ITEM(item), key, hash_table->params.key_size);
811 0 : insert_item_into_bucket(hash_table, item_pointer, item, bucket);
812 0 : return item;
813 : }
814 :
815 : /*
816 : * Search a bucket for a matching key and delete it.
817 : */
818 : static bool
819 0 : delete_key_from_bucket(dshash_table *hash_table,
820 : const void *key,
821 : dsa_pointer *bucket_head)
822 : {
823 0 : while (DsaPointerIsValid(*bucket_head))
824 : {
825 : dshash_table_item *item;
826 :
827 0 : item = dsa_get_address(hash_table->area, *bucket_head);
828 :
829 0 : if (equal_keys(hash_table, key, ENTRY_FROM_ITEM(item)))
830 : {
831 : dsa_pointer next;
832 :
833 0 : next = item->next;
834 0 : dsa_free(hash_table->area, *bucket_head);
835 0 : *bucket_head = next;
836 :
837 0 : return true;
838 : }
839 0 : bucket_head = &item->next;
840 : }
841 0 : return false;
842 : }
843 :
844 : /*
845 : * Delete the specified item from the bucket.
846 : */
847 : static bool
848 0 : delete_item_from_bucket(dshash_table *hash_table,
849 : dshash_table_item *item,
850 : dsa_pointer *bucket_head)
851 : {
852 0 : while (DsaPointerIsValid(*bucket_head))
853 : {
854 : dshash_table_item *bucket_item;
855 :
856 0 : bucket_item = dsa_get_address(hash_table->area, *bucket_head);
857 :
858 0 : if (bucket_item == item)
859 : {
860 : dsa_pointer next;
861 :
862 0 : next = item->next;
863 0 : dsa_free(hash_table->area, *bucket_head);
864 0 : *bucket_head = next;
865 0 : return true;
866 : }
867 0 : bucket_head = &bucket_item->next;
868 : }
869 0 : return false;
870 : }
871 :
872 : /*
873 : * Compute the hash value for a key.
874 : */
875 : static inline dshash_hash
876 0 : hash_key(dshash_table *hash_table, const void *key)
877 : {
878 0 : return hash_table->params.hash_function(key,
879 : hash_table->params.key_size,
880 : hash_table->arg);
881 : }
882 :
883 : /*
884 : * Check whether two keys compare equal.
885 : */
886 : static inline bool
887 0 : equal_keys(dshash_table *hash_table, const void *a, const void *b)
888 : {
889 0 : return hash_table->params.compare_function(a, b,
890 : hash_table->params.key_size,
891 0 : hash_table->arg) == 0;
892 : }
|