LCOV - code coverage report
Current view: top level - src/backend/utils/hash - dynahash.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 412 521 79.1 %
Date: 2017-09-29 15:12:54 Functions: 32 35 91.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dynahash.c
       4             :  *    dynamic hash tables
       5             :  *
       6             :  * dynahash.c supports both local-to-a-backend hash tables and hash tables in
       7             :  * shared memory.  For shared hash tables, it is the caller's responsibility
       8             :  * to provide appropriate access interlocking.  The simplest convention is
       9             :  * that a single LWLock protects the whole hash table.  Searches (HASH_FIND or
      10             :  * hash_seq_search) need only shared lock, but any update requires exclusive
      11             :  * lock.  For heavily-used shared tables, the single-lock approach creates a
      12             :  * concurrency bottleneck, so we also support "partitioned" locking wherein
      13             :  * there are multiple LWLocks guarding distinct subsets of the table.  To use
      14             :  * a hash table in partitioned mode, the HASH_PARTITION flag must be given
      15             :  * to hash_create.  This prevents any attempt to split buckets on-the-fly.
      16             :  * Therefore, each hash bucket chain operates independently, and no fields
      17             :  * of the hash header change after init except nentries and freeList.
      18             :  * (A partitioned table uses multiple copies of those fields, guarded by
      19             :  * spinlocks, for additional concurrency.)
      20             :  * This lets any subset of the hash buckets be treated as a separately
      21             :  * lockable partition.  We expect callers to use the low-order bits of a
      22             :  * lookup key's hash value as a partition number --- this will work because
      23             :  * of the way calc_bucket() maps hash values to bucket numbers.
      24             :  *
      25             :  * For hash tables in shared memory, the memory allocator function should
      26             :  * match malloc's semantics of returning NULL on failure.  For hash tables
      27             :  * in local memory, we typically use palloc() which will throw error on
      28             :  * failure.  The code in this file has to cope with both cases.
      29             :  *
      30             :  * dynahash.c provides support for these types of lookup keys:
      31             :  *
      32             :  * 1. Null-terminated C strings (truncated if necessary to fit in keysize),
      33             :  * compared as though by strcmp().  This is the default behavior.
      34             :  *
      35             :  * 2. Arbitrary binary data of size keysize, compared as though by memcmp().
      36             :  * (Caller must ensure there are no undefined padding bits in the keys!)
      37             :  * This is selected by specifying HASH_BLOBS flag to hash_create.
      38             :  *
      39             :  * 3. More complex key behavior can be selected by specifying user-supplied
      40             :  * hashing, comparison, and/or key-copying functions.  At least a hashing
      41             :  * function must be supplied; comparison defaults to memcmp() and key copying
      42             :  * to memcpy() when a user-defined hashing function is selected.
      43             :  *
      44             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      45             :  * Portions Copyright (c) 1994, Regents of the University of California
      46             :  *
      47             :  *
      48             :  * IDENTIFICATION
      49             :  *    src/backend/utils/hash/dynahash.c
      50             :  *
      51             :  *-------------------------------------------------------------------------
      52             :  */
      53             : 
      54             : /*
      55             :  * Original comments:
      56             :  *
      57             :  * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson.
      58             :  * Coded into C, with minor code improvements, and with hsearch(3) interface,
      59             :  * by ejp@ausmelb.oz, Jul 26, 1988: 13:16;
      60             :  * also, hcreate/hdestroy routines added to simulate hsearch(3).
      61             :  *
      62             :  * These routines simulate hsearch(3) and family, with the important
      63             :  * difference that the hash table is dynamic - can grow indefinitely
      64             :  * beyond its original size (as supplied to hcreate()).
      65             :  *
      66             :  * Performance appears to be comparable to that of hsearch(3).
      67             :  * The 'source-code' options referred to in hsearch(3)'s 'man' page
      68             :  * are not implemented; otherwise functionality is identical.
      69             :  *
      70             :  * Compilation controls:
      71             :  * HASH_DEBUG controls some informative traces, mainly for debugging.
      72             :  * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained;
      73             :  * when combined with HASH_DEBUG, these are displayed by hdestroy().
      74             :  *
      75             :  * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor
      76             :  * concatenation property, in probably unnecessary code 'optimization'.
      77             :  *
      78             :  * Modified margo@postgres.berkeley.edu February 1990
      79             :  *      added multiple table interface
      80             :  * Modified by sullivan@postgres.berkeley.edu April 1990
      81             :  *      changed ctl structure for shared memory
      82             :  */
      83             : 
      84             : #include "postgres.h"
      85             : 
      86             : #include <limits.h>
      87             : 
      88             : #include "access/xact.h"
      89             : #include "storage/shmem.h"
      90             : #include "storage/spin.h"
      91             : #include "utils/dynahash.h"
      92             : #include "utils/memutils.h"
      93             : 
      94             : 
      95             : /*
      96             :  * Constants
      97             :  *
      98             :  * A hash table has a top-level "directory", each of whose entries points
      99             :  * to a "segment" of ssize bucket headers.  The maximum number of hash
     100             :  * buckets is thus dsize * ssize (but dsize may be expansible).  Of course,
     101             :  * the number of records in the table can be larger, but we don't want a
     102             :  * whole lot of records per bucket or performance goes down.
     103             :  *
     104             :  * In a hash table allocated in shared memory, the directory cannot be
     105             :  * expanded because it must stay at a fixed address.  The directory size
     106             :  * should be selected using hash_select_dirsize (and you'd better have
     107             :  * a good idea of the maximum number of entries!).  For non-shared hash
     108             :  * tables, the initial directory size can be left at the default.
     109             :  */
     110             : #define DEF_SEGSIZE            256
     111             : #define DEF_SEGSIZE_SHIFT      8    /* must be log2(DEF_SEGSIZE) */
     112             : #define DEF_DIRSIZE            256
     113             : #define DEF_FFACTOR            1    /* default fill factor */
     114             : 
     115             : /* Number of freelists to be used for a partitioned hash table. */
     116             : #define NUM_FREELISTS           32
     117             : 
     118             : /* A hash bucket is a linked list of HASHELEMENTs */
     119             : typedef HASHELEMENT *HASHBUCKET;
     120             : 
     121             : /* A hash segment is an array of bucket headers */
     122             : typedef HASHBUCKET *HASHSEGMENT;
     123             : 
     124             : /*
     125             :  * Per-freelist data.
     126             :  *
     127             :  * In a partitioned hash table, each freelist is associated with a specific
     128             :  * set of hashcodes, as determined by the FREELIST_IDX() macro below.
     129             :  * nentries tracks the number of live hashtable entries having those hashcodes
     130             :  * (NOT the number of entries in the freelist, as you might expect).
     131             :  *
     132             :  * The coverage of a freelist might be more or less than one partition, so it
     133             :  * needs its own lock rather than relying on caller locking.  Relying on that
     134             :  * wouldn't work even if the coverage was the same, because of the occasional
     135             :  * need to "borrow" entries from another freelist; see get_hash_entry().
     136             :  *
     137             :  * Using an array of FreeListData instead of separate arrays of mutexes,
     138             :  * nentries and freeLists helps to reduce sharing of cache lines between
     139             :  * different mutexes.
     140             :  */
     141             : typedef struct
     142             : {
     143             :     slock_t     mutex;          /* spinlock for this freelist */
     144             :     long        nentries;       /* number of entries in associated buckets */
     145             :     HASHELEMENT *freeList;      /* chain of free elements */
     146             : } FreeListData;
     147             : 
     148             : /*
     149             :  * Header structure for a hash table --- contains all changeable info
     150             :  *
     151             :  * In a shared-memory hash table, the HASHHDR is in shared memory, while
     152             :  * each backend has a local HTAB struct.  For a non-shared table, there isn't
     153             :  * any functional difference between HASHHDR and HTAB, but we separate them
     154             :  * anyway to share code between shared and non-shared tables.
     155             :  */
     156             : struct HASHHDR
     157             : {
     158             :     /*
     159             :      * The freelist can become a point of contention in high-concurrency hash
     160             :      * tables, so we use an array of freelists, each with its own mutex and
     161             :      * nentries count, instead of just a single one.  Although the freelists
     162             :      * normally operate independently, we will scavenge entries from freelists
     163             :      * other than a hashcode's default freelist when necessary.
     164             :      *
     165             :      * If the hash table is not partitioned, only freeList[0] is used and its
     166             :      * spinlock is not used at all; callers' locking is assumed sufficient.
     167             :      */
     168             :     FreeListData freeList[NUM_FREELISTS];
     169             : 
     170             :     /* These fields can change, but not in a partitioned table */
     171             :     /* Also, dsize can't change in a shared table, even if unpartitioned */
     172             :     long        dsize;          /* directory size */
     173             :     long        nsegs;          /* number of allocated segments (<= dsize) */
     174             :     uint32      max_bucket;     /* ID of maximum bucket in use */
     175             :     uint32      high_mask;      /* mask to modulo into entire table */
     176             :     uint32      low_mask;       /* mask to modulo into lower half of table */
     177             : 
     178             :     /* These fields are fixed at hashtable creation */
     179             :     Size        keysize;        /* hash key length in bytes */
     180             :     Size        entrysize;      /* total user element size in bytes */
     181             :     long        num_partitions; /* # partitions (must be power of 2), or 0 */
     182             :     long        ffactor;        /* target fill factor */
     183             :     long        max_dsize;      /* 'dsize' limit if directory is fixed size */
     184             :     long        ssize;          /* segment size --- must be power of 2 */
     185             :     int         sshift;         /* segment shift = log2(ssize) */
     186             :     int         nelem_alloc;    /* number of entries to allocate at once */
     187             : 
     188             : #ifdef HASH_STATISTICS
     189             : 
     190             :     /*
     191             :      * Count statistics here.  NB: stats code doesn't bother with mutex, so
     192             :      * counts could be corrupted a bit in a partitioned table.
     193             :      */
     194             :     long        accesses;
     195             :     long        collisions;
     196             : #endif
     197             : };
     198             : 
     199             : #define IS_PARTITIONED(hctl)  ((hctl)->num_partitions != 0)
     200             : 
     201             : #define FREELIST_IDX(hctl, hashcode) \
     202             :     (IS_PARTITIONED(hctl) ? (hashcode) % NUM_FREELISTS : 0)
     203             : 
     204             : /*
     205             :  * Top control structure for a hashtable --- in a shared table, each backend
     206             :  * has its own copy (OK since no fields change at runtime)
     207             :  */
     208             : struct HTAB
     209             : {
     210             :     HASHHDR    *hctl;           /* => shared control information */
     211             :     HASHSEGMENT *dir;           /* directory of segment starts */
     212             :     HashValueFunc hash;         /* hash function */
     213             :     HashCompareFunc match;      /* key comparison function */
     214             :     HashCopyFunc keycopy;       /* key copying function */
     215             :     HashAllocFunc alloc;        /* memory allocator */
     216             :     MemoryContext hcxt;         /* memory context if default allocator used */
     217             :     char       *tabname;        /* table name (for error messages) */
     218             :     bool        isshared;       /* true if table is in shared memory */
     219             :     bool        isfixed;        /* if true, don't enlarge */
     220             : 
     221             :     /* freezing a shared table isn't allowed, so we can keep state here */
     222             :     bool        frozen;         /* true = no more inserts allowed */
     223             : 
     224             :     /* We keep local copies of these fixed values to reduce contention */
     225             :     Size        keysize;        /* hash key length in bytes */
     226             :     long        ssize;          /* segment size --- must be power of 2 */
     227             :     int         sshift;         /* segment shift = log2(ssize) */
     228             : };
     229             : 
     230             : /*
     231             :  * Key (also entry) part of a HASHELEMENT
     232             :  */
     233             : #define ELEMENTKEY(helem)  (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))
     234             : 
     235             : /*
     236             :  * Obtain element pointer given pointer to key
     237             :  */
     238             : #define ELEMENT_FROM_KEY(key)  \
     239             :     ((HASHELEMENT *) (((char *) (key)) - MAXALIGN(sizeof(HASHELEMENT))))
     240             : 
     241             : /*
     242             :  * Fast MOD arithmetic, assuming that y is a power of 2 !
     243             :  */
     244             : #define MOD(x,y)               ((x) & ((y)-1))
     245             : 
     246             : #if HASH_STATISTICS
     247             : static long hash_accesses,
     248             :             hash_collisions,
     249             :             hash_expansions;
     250             : #endif
     251             : 
     252             : /*
     253             :  * Private function prototypes
     254             :  */
     255             : static void *DynaHashAlloc(Size size);
     256             : static HASHSEGMENT seg_alloc(HTAB *hashp);
     257             : static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
     258             : static bool dir_realloc(HTAB *hashp);
     259             : static bool expand_table(HTAB *hashp);
     260             : static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
     261             : static void hdefault(HTAB *hashp);
     262             : static int  choose_nelem_alloc(Size entrysize);
     263             : static bool init_htab(HTAB *hashp, long nelem);
     264             : static void hash_corrupted(HTAB *hashp);
     265             : static long next_pow2_long(long num);
     266             : static int  next_pow2_int(long num);
     267             : static void register_seq_scan(HTAB *hashp);
     268             : static void deregister_seq_scan(HTAB *hashp);
     269             : static bool has_seq_scans(HTAB *hashp);
     270             : 
     271             : 
     272             : /*
     273             :  * memory allocation support
     274             :  */
     275             : static MemoryContext CurrentDynaHashCxt = NULL;
     276             : 
     277             : static void *
     278       53291 : DynaHashAlloc(Size size)
     279             : {
     280       53291 :     Assert(MemoryContextIsValid(CurrentDynaHashCxt));
     281       53291 :     return MemoryContextAlloc(CurrentDynaHashCxt, size);
     282             : }
     283             : 
     284             : 
     285             : /*
     286             :  * HashCompareFunc for string keys
     287             :  *
     288             :  * Because we copy keys with strlcpy(), they will be truncated at keysize-1
     289             :  * bytes, so we can only compare that many ... hence strncmp is almost but
     290             :  * not quite the right thing.
     291             :  */
     292             : static int
     293       30327 : string_compare(const char *key1, const char *key2, Size keysize)
     294             : {
     295       30327 :     return strncmp(key1, key2, keysize - 1);
     296             : }
     297             : 
     298             : 
     299             : /************************** CREATE ROUTINES **********************/
     300             : 
     301             : /*
     302             :  * hash_create -- create a new dynamic hash table
     303             :  *
     304             :  *  tabname: a name for the table (for debugging purposes)
     305             :  *  nelem: maximum number of elements expected
     306             :  *  *info: additional table parameters, as indicated by flags
     307             :  *  flags: bitmask indicating which parameters to take from *info
     308             :  *
     309             :  * Note: for a shared-memory hashtable, nelem needs to be a pretty good
     310             :  * estimate, since we can't expand the table on the fly.  But an unshared
     311             :  * hashtable can be expanded on-the-fly, so it's better for nelem to be
     312             :  * on the small side and let the table grow if it's exceeded.  An overly
     313             :  * large nelem will penalize hash_seq_search speed without buying much.
     314             :  */
     315             : HTAB *
     316        9769 : hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
     317             : {
     318             :     HTAB       *hashp;
     319             :     HASHHDR    *hctl;
     320             : 
     321             :     /*
     322             :      * For shared hash tables, we have a local hash header (HTAB struct) that
     323             :      * we allocate in TopMemoryContext; all else is in shared memory.
     324             :      *
     325             :      * For non-shared hash tables, everything including the hash header is in
     326             :      * a memory context created specially for the hash table --- this makes
     327             :      * hash_destroy very simple.  The memory context is made a child of either
     328             :      * a context specified by the caller, or TopMemoryContext if nothing is
     329             :      * specified.
     330             :      */
     331        9769 :     if (flags & HASH_SHARED_MEM)
     332             :     {
     333             :         /* Set up to allocate the hash header */
     334          35 :         CurrentDynaHashCxt = TopMemoryContext;
     335             :     }
     336             :     else
     337             :     {
     338             :         /* Create the hash table's private memory context */
     339        9734 :         if (flags & HASH_CONTEXT)
     340        6132 :             CurrentDynaHashCxt = info->hcxt;
     341             :         else
     342        3602 :             CurrentDynaHashCxt = TopMemoryContext;
     343        9734 :         CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt,
     344             :                                                    tabname,
     345             :                                                    ALLOCSET_DEFAULT_SIZES);
     346             :     }
     347             : 
     348             :     /* Initialize the hash header, plus a copy of the table name */
     349        9769 :     hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) + 1);
     350        9769 :     MemSet(hashp, 0, sizeof(HTAB));
     351             : 
     352        9769 :     hashp->tabname = (char *) (hashp + 1);
     353        9769 :     strcpy(hashp->tabname, tabname);
     354             : 
     355             :     /*
     356             :      * Select the appropriate hash function (see comments at head of file).
     357             :      */
     358        9769 :     if (flags & HASH_FUNCTION)
     359          90 :         hashp->hash = info->hash;
     360        9679 :     else if (flags & HASH_BLOBS)
     361             :     {
     362             :         /* We can optimize hashing for common key sizes */
     363        6547 :         Assert(flags & HASH_ELEM);
     364        6547 :         if (info->keysize == sizeof(uint32))
     365        3942 :             hashp->hash = uint32_hash;
     366             :         else
     367        2605 :             hashp->hash = tag_hash;
     368             :     }
     369             :     else
     370        3132 :         hashp->hash = string_hash;   /* default hash function */
     371             : 
     372             :     /*
     373             :      * If you don't specify a match function, it defaults to string_compare if
     374             :      * you used string_hash (either explicitly or by default) and to memcmp
     375             :      * otherwise.
     376             :      *
     377             :      * Note: explicitly specifying string_hash is deprecated, because this
     378             :      * might not work for callers in loadable modules on some platforms due to
     379             :      * referencing a trampoline instead of the string_hash function proper.
     380             :      * Just let it default, eh?
     381             :      */
     382        9769 :     if (flags & HASH_COMPARE)
     383          80 :         hashp->match = info->match;
     384        9689 :     else if (hashp->hash == string_hash)
     385        3132 :         hashp->match = (HashCompareFunc) string_compare;
     386             :     else
     387        6557 :         hashp->match = memcmp;
     388             : 
     389             :     /*
     390             :      * Similarly, the key-copying function defaults to strlcpy or memcpy.
     391             :      */
     392        9769 :     if (flags & HASH_KEYCOPY)
     393           0 :         hashp->keycopy = info->keycopy;
     394        9769 :     else if (hashp->hash == string_hash)
     395        3132 :         hashp->keycopy = (HashCopyFunc) strlcpy;
     396             :     else
     397        6637 :         hashp->keycopy = memcpy;
     398             : 
     399             :     /* And select the entry allocation function, too. */
     400        9769 :     if (flags & HASH_ALLOC)
     401          35 :         hashp->alloc = info->alloc;
     402             :     else
     403        9734 :         hashp->alloc = DynaHashAlloc;
     404             : 
     405        9769 :     if (flags & HASH_SHARED_MEM)
     406             :     {
     407             :         /*
     408             :          * ctl structure and directory are preallocated for shared memory
     409             :          * tables.  Note that HASH_DIRSIZE and HASH_ALLOC had better be set as
     410             :          * well.
     411             :          */
     412          35 :         hashp->hctl = info->hctl;
     413          35 :         hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));
     414          35 :         hashp->hcxt = NULL;
     415          35 :         hashp->isshared = true;
     416             : 
     417             :         /* hash table already exists, we're just attaching to it */
     418          35 :         if (flags & HASH_ATTACH)
     419             :         {
     420             :             /* make local copies of some heavily-used values */
     421           0 :             hctl = hashp->hctl;
     422           0 :             hashp->keysize = hctl->keysize;
     423           0 :             hashp->ssize = hctl->ssize;
     424           0 :             hashp->sshift = hctl->sshift;
     425             : 
     426           0 :             return hashp;
     427             :         }
     428             :     }
     429             :     else
     430             :     {
     431             :         /* setup hash table defaults */
     432        9734 :         hashp->hctl = NULL;
     433        9734 :         hashp->dir = NULL;
     434        9734 :         hashp->hcxt = CurrentDynaHashCxt;
     435        9734 :         hashp->isshared = false;
     436             :     }
     437             : 
     438        9769 :     if (!hashp->hctl)
     439             :     {
     440        9734 :         hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
     441        9734 :         if (!hashp->hctl)
     442           0 :             ereport(ERROR,
     443             :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     444             :                      errmsg("out of memory")));
     445             :     }
     446             : 
     447        9769 :     hashp->frozen = false;
     448             : 
     449        9769 :     hdefault(hashp);
     450             : 
     451        9769 :     hctl = hashp->hctl;
     452             : 
     453        9769 :     if (flags & HASH_PARTITION)
     454             :     {
     455             :         /* Doesn't make sense to partition a local hash table */
     456          25 :         Assert(flags & HASH_SHARED_MEM);
     457             : 
     458             :         /*
     459             :          * The number of partitions had better be a power of 2. Also, it must
     460             :          * be less than INT_MAX (see init_htab()), so call the int version of
     461             :          * next_pow2.
     462             :          */
     463          25 :         Assert(info->num_partitions == next_pow2_int(info->num_partitions));
     464             : 
     465          25 :         hctl->num_partitions = info->num_partitions;
     466             :     }
     467             : 
     468        9769 :     if (flags & HASH_SEGMENT)
     469             :     {
     470           0 :         hctl->ssize = info->ssize;
     471           0 :         hctl->sshift = my_log2(info->ssize);
     472             :         /* ssize had better be a power of 2 */
     473           0 :         Assert(hctl->ssize == (1L << hctl->sshift));
     474             :     }
     475        9769 :     if (flags & HASH_FFACTOR)
     476           0 :         hctl->ffactor = info->ffactor;
     477             : 
     478             :     /*
     479             :      * SHM hash tables have fixed directory size passed by the caller.
     480             :      */
     481        9769 :     if (flags & HASH_DIRSIZE)
     482             :     {
     483          35 :         hctl->max_dsize = info->max_dsize;
     484          35 :         hctl->dsize = info->dsize;
     485             :     }
     486             : 
     487             :     /*
     488             :      * hash table now allocates space for key and data but you have to say how
     489             :      * much space to allocate
     490             :      */
     491        9769 :     if (flags & HASH_ELEM)
     492             :     {
     493        9769 :         Assert(info->entrysize >= info->keysize);
     494        9769 :         hctl->keysize = info->keysize;
     495        9769 :         hctl->entrysize = info->entrysize;
     496             :     }
     497             : 
     498             :     /* make local copies of heavily-used constant fields */
     499        9769 :     hashp->keysize = hctl->keysize;
     500        9769 :     hashp->ssize = hctl->ssize;
     501        9769 :     hashp->sshift = hctl->sshift;
     502             : 
     503             :     /* Build the hash directory structure */
     504        9769 :     if (!init_htab(hashp, nelem))
     505           0 :         elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
     506             : 
     507             :     /*
     508             :      * For a shared hash table, preallocate the requested number of elements.
     509             :      * This reduces problems with run-time out-of-shared-memory conditions.
     510             :      *
     511             :      * For a non-shared hash table, preallocate the requested number of
     512             :      * elements if it's less than our chosen nelem_alloc.  This avoids wasting
     513             :      * space if the caller correctly estimates a small table size.
     514             :      */
     515       19503 :     if ((flags & HASH_SHARED_MEM) ||
     516        9734 :         nelem < hctl->nelem_alloc)
     517             :     {
     518             :         int         i,
     519             :                     freelist_partitions,
     520             :                     nelem_alloc,
     521             :                     nelem_alloc_first;
     522             : 
     523             :         /*
     524             :          * If hash table is partitioned, give each freelist an equal share of
     525             :          * the initial allocation.  Otherwise only freeList[0] is used.
     526             :          */
     527        3293 :         if (IS_PARTITIONED(hashp->hctl))
     528          25 :             freelist_partitions = NUM_FREELISTS;
     529             :         else
     530        3268 :             freelist_partitions = 1;
     531             : 
     532        3293 :         nelem_alloc = nelem / freelist_partitions;
     533        3293 :         if (nelem_alloc <= 0)
     534           0 :             nelem_alloc = 1;
     535             : 
     536             :         /*
     537             :          * Make sure we'll allocate all the requested elements; freeList[0]
     538             :          * gets the excess if the request isn't divisible by NUM_FREELISTS.
     539             :          */
     540        3293 :         if (nelem_alloc * freelist_partitions < nelem)
     541           1 :             nelem_alloc_first =
     542           1 :                 nelem - nelem_alloc * (freelist_partitions - 1);
     543             :         else
     544        3292 :             nelem_alloc_first = nelem_alloc;
     545             : 
     546        7361 :         for (i = 0; i < freelist_partitions; i++)
     547             :         {
     548        4068 :             int         temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
     549             : 
     550        4068 :             if (!element_alloc(hashp, temp, i))
     551           0 :                 ereport(ERROR,
     552             :                         (errcode(ERRCODE_OUT_OF_MEMORY),
     553             :                          errmsg("out of memory")));
     554             :         }
     555             :     }
     556             : 
     557        9769 :     if (flags & HASH_FIXED_SIZE)
     558          15 :         hashp->isfixed = true;
     559        9769 :     return hashp;
     560             : }
     561             : 
     562             : /*
     563             :  * Set default HASHHDR parameters.
     564             :  */
     565             : static void
     566        9769 : hdefault(HTAB *hashp)
     567             : {
     568        9769 :     HASHHDR    *hctl = hashp->hctl;
     569             : 
     570        9769 :     MemSet(hctl, 0, sizeof(HASHHDR));
     571             : 
     572        9769 :     hctl->dsize = DEF_DIRSIZE;
     573        9769 :     hctl->nsegs = 0;
     574             : 
     575             :     /* rather pointless defaults for key & entry size */
     576        9769 :     hctl->keysize = sizeof(char *);
     577        9769 :     hctl->entrysize = 2 * sizeof(char *);
     578             : 
     579        9769 :     hctl->num_partitions = 0;    /* not partitioned */
     580             : 
     581        9769 :     hctl->ffactor = DEF_FFACTOR;
     582             : 
     583             :     /* table has no fixed maximum size */
     584        9769 :     hctl->max_dsize = NO_MAX_DSIZE;
     585             : 
     586        9769 :     hctl->ssize = DEF_SEGSIZE;
     587        9769 :     hctl->sshift = DEF_SEGSIZE_SHIFT;
     588             : 
     589             : #ifdef HASH_STATISTICS
     590             :     hctl->accesses = hctl->collisions = 0;
     591             : #endif
     592        9769 : }
     593             : 
     594             : /*
     595             :  * Given the user-specified entry size, choose nelem_alloc, ie, how many
     596             :  * elements to add to the hash table when we need more.
     597             :  */
     598             : static int
     599        9804 : choose_nelem_alloc(Size entrysize)
     600             : {
     601             :     int         nelem_alloc;
     602             :     Size        elementSize;
     603             :     Size        allocSize;
     604             : 
     605             :     /* Each element has a HASHELEMENT header plus user data. */
     606             :     /* NB: this had better match element_alloc() */
     607        9804 :     elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
     608             : 
     609             :     /*
     610             :      * The idea here is to choose nelem_alloc at least 32, but round up so
     611             :      * that the allocation request will be a power of 2 or just less. This
     612             :      * makes little difference for hash tables in shared memory, but for hash
     613             :      * tables managed by palloc, the allocation request will be rounded up to
     614             :      * a power of 2 anyway.  If we fail to take this into account, we'll waste
     615             :      * as much as half the allocated space.
     616             :      */
     617        9804 :     allocSize = 32 * 4;         /* assume elementSize at least 8 */
     618             :     do
     619             :     {
     620       36787 :         allocSize <<= 1;
     621       36787 :         nelem_alloc = allocSize / elementSize;
     622       36787 :     } while (nelem_alloc < 32);
     623             : 
     624        9804 :     return nelem_alloc;
     625             : }
     626             : 
     627             : /*
     628             :  * Compute derived fields of hctl and build the initial directory/segment
     629             :  * arrays
     630             :  */
     631             : static bool
     632        9769 : init_htab(HTAB *hashp, long nelem)
     633             : {
     634        9769 :     HASHHDR    *hctl = hashp->hctl;
     635             :     HASHSEGMENT *segp;
     636             :     int         nbuckets;
     637             :     int         nsegs;
     638             :     int         i;
     639             : 
     640             :     /*
     641             :      * initialize mutexes if it's a partitioned table
     642             :      */
     643        9769 :     if (IS_PARTITIONED(hctl))
     644         825 :         for (i = 0; i < NUM_FREELISTS; i++)
     645         800 :             SpinLockInit(&(hctl->freeList[i].mutex));
     646             : 
     647             :     /*
     648             :      * Divide number of elements by the fill factor to determine a desired
     649             :      * number of buckets.  Allocate space for the next greater power of two
     650             :      * number of buckets
     651             :      */
     652        9769 :     nbuckets = next_pow2_int((nelem - 1) / hctl->ffactor + 1);
     653             : 
     654             :     /*
     655             :      * In a partitioned table, nbuckets must be at least equal to
     656             :      * num_partitions; were it less, keys with apparently different partition
     657             :      * numbers would map to the same bucket, breaking partition independence.
     658             :      * (Normally nbuckets will be much bigger; this is just a safety check.)
     659             :      */
     660       19538 :     while (nbuckets < hctl->num_partitions)
     661           0 :         nbuckets <<= 1;
     662             : 
     663        9769 :     hctl->max_bucket = hctl->low_mask = nbuckets - 1;
     664        9769 :     hctl->high_mask = (nbuckets << 1) - 1;
     665             : 
     666             :     /*
     667             :      * Figure number of directory segments needed, round up to a power of 2
     668             :      */
     669        9769 :     nsegs = (nbuckets - 1) / hctl->ssize + 1;
     670        9769 :     nsegs = next_pow2_int(nsegs);
     671             : 
     672             :     /*
     673             :      * Make sure directory is big enough. If pre-allocated directory is too
     674             :      * small, choke (caller screwed up).
     675             :      */
     676        9769 :     if (nsegs > hctl->dsize)
     677             :     {
     678           0 :         if (!(hashp->dir))
     679           0 :             hctl->dsize = nsegs;
     680             :         else
     681           0 :             return false;
     682             :     }
     683             : 
     684             :     /* Allocate a directory */
     685        9769 :     if (!(hashp->dir))
     686             :     {
     687        9734 :         CurrentDynaHashCxt = hashp->hcxt;
     688        9734 :         hashp->dir = (HASHSEGMENT *)
     689        9734 :             hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
     690        9734 :         if (!hashp->dir)
     691           0 :             return false;
     692             :     }
     693             : 
     694             :     /* Allocate initial segments */
     695       22265 :     for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
     696             :     {
     697       12496 :         *segp = seg_alloc(hashp);
     698       12496 :         if (*segp == NULL)
     699           0 :             return false;
     700             :     }
     701             : 
     702             :     /* Choose number of entries to allocate at a time */
     703        9769 :     hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize);
     704             : 
     705             : #if HASH_DEBUG
     706             :     fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n",
     707             :             "TABLE POINTER   ", hashp,
     708             :             "DIRECTORY SIZE  ", hctl->dsize,
     709             :             "SEGMENT SIZE    ", hctl->ssize,
     710             :             "SEGMENT SHIFT   ", hctl->sshift,
     711             :             "FILL FACTOR     ", hctl->ffactor,
     712             :             "MAX BUCKET      ", hctl->max_bucket,
     713             :             "HIGH MASK       ", hctl->high_mask,
     714             :             "LOW  MASK       ", hctl->low_mask,
     715             :             "NSEGS           ", hctl->nsegs);
     716             : #endif
     717        9769 :     return true;
     718             : }
     719             : 
     720             : /*
     721             :  * Estimate the space needed for a hashtable containing the given number
     722             :  * of entries of given size.
     723             :  * NOTE: this is used to estimate the footprint of hashtables in shared
     724             :  * memory; therefore it does not count HTAB which is in local memory.
     725             :  * NB: assumes that all hash structure parameters have default values!
     726             :  */
     727             : Size
     728          35 : hash_estimate_size(long num_entries, Size entrysize)
     729             : {
     730             :     Size        size;
     731             :     long        nBuckets,
     732             :                 nSegments,
     733             :                 nDirEntries,
     734             :                 nElementAllocs,
     735             :                 elementSize,
     736             :                 elementAllocCnt;
     737             : 
     738             :     /* estimate number of buckets wanted */
     739          35 :     nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1);
     740             :     /* # of segments needed for nBuckets */
     741          35 :     nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
     742             :     /* directory entries */
     743          35 :     nDirEntries = DEF_DIRSIZE;
     744          70 :     while (nDirEntries < nSegments)
     745           0 :         nDirEntries <<= 1;        /* dir_alloc doubles dsize at each call */
     746             : 
     747             :     /* fixed control info */
     748          35 :     size = MAXALIGN(sizeof(HASHHDR));   /* but not HTAB, per above */
     749             :     /* directory */
     750          35 :     size = add_size(size, mul_size(nDirEntries, sizeof(HASHSEGMENT)));
     751             :     /* segments */
     752          35 :     size = add_size(size, mul_size(nSegments,
     753             :                                    MAXALIGN(DEF_SEGSIZE * sizeof(HASHBUCKET))));
     754             :     /* elements --- allocated in groups of choose_nelem_alloc() entries */
     755          35 :     elementAllocCnt = choose_nelem_alloc(entrysize);
     756          35 :     nElementAllocs = (num_entries - 1) / elementAllocCnt + 1;
     757          35 :     elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);
     758          35 :     size = add_size(size,
     759             :                     mul_size(nElementAllocs,
     760             :                              mul_size(elementAllocCnt, elementSize)));
     761             : 
     762          35 :     return size;
     763             : }
     764             : 
     765             : /*
     766             :  * Select an appropriate directory size for a hashtable with the given
     767             :  * maximum number of entries.
     768             :  * This is only needed for hashtables in shared memory, whose directories
     769             :  * cannot be expanded dynamically.
     770             :  * NB: assumes that all hash structure parameters have default values!
     771             :  *
     772             :  * XXX this had better agree with the behavior of init_htab()...
     773             :  */
     774             : long
     775          35 : hash_select_dirsize(long num_entries)
     776             : {
     777             :     long        nBuckets,
     778             :                 nSegments,
     779             :                 nDirEntries;
     780             : 
     781             :     /* estimate number of buckets wanted */
     782          35 :     nBuckets = next_pow2_long((num_entries - 1) / DEF_FFACTOR + 1);
     783             :     /* # of segments needed for nBuckets */
     784          35 :     nSegments = next_pow2_long((nBuckets - 1) / DEF_SEGSIZE + 1);
     785             :     /* directory entries */
     786          35 :     nDirEntries = DEF_DIRSIZE;
     787          70 :     while (nDirEntries < nSegments)
     788           0 :         nDirEntries <<= 1;        /* dir_alloc doubles dsize at each call */
     789             : 
     790          35 :     return nDirEntries;
     791             : }
     792             : 
     793             : /*
     794             :  * Compute the required initial memory allocation for a shared-memory
     795             :  * hashtable with the given parameters.  We need space for the HASHHDR
     796             :  * and for the (non expansible) directory.
     797             :  */
     798             : Size
     799          35 : hash_get_shared_size(HASHCTL *info, int flags)
     800             : {
     801          35 :     Assert(flags & HASH_DIRSIZE);
     802          35 :     Assert(info->dsize == info->max_dsize);
     803          35 :     return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
     804             : }
     805             : 
     806             : 
     807             : /********************** DESTROY ROUTINES ************************/
     808             : 
     809             : void
     810        4226 : hash_destroy(HTAB *hashp)
     811             : {
     812        4226 :     if (hashp != NULL)
     813             :     {
     814             :         /* allocation method must be one we know how to free, too */
     815        4226 :         Assert(hashp->alloc == DynaHashAlloc);
     816             :         /* so this hashtable must have it's own context */
     817        4226 :         Assert(hashp->hcxt != NULL);
     818             : 
     819        4226 :         hash_stats("destroy", hashp);
     820             : 
     821             :         /*
     822             :          * Free everything by destroying the hash table's memory context.
     823             :          */
     824        4226 :         MemoryContextDelete(hashp->hcxt);
     825             :     }
     826        4226 : }
     827             : 
     828             : void
     829        4226 : hash_stats(const char *where, HTAB *hashp)
     830             : {
     831             : #if HASH_STATISTICS
     832             :     fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n",
     833             :             where, hashp->hctl->accesses, hashp->hctl->collisions);
     834             : 
     835             :     fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
     836             :             hash_get_num_entries(hashp), (long) hashp->hctl->keysize,
     837             :             hashp->hctl->max_bucket, hashp->hctl->nsegs);
     838             :     fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
     839             :             where, hash_accesses, hash_collisions);
     840             :     fprintf(stderr, "hash_stats: total expansions %ld\n",
     841             :             hash_expansions);
     842             : #endif
     843        4226 : }
     844             : 
     845             : /*******************************SEARCH ROUTINES *****************************/
     846             : 
     847             : 
     848             : /*
     849             :  * get_hash_value -- exported routine to calculate a key's hash value
     850             :  *
     851             :  * We export this because for partitioned tables, callers need to compute
     852             :  * the partition number (from the low-order bits of the hash value) before
     853             :  * searching.
     854             :  */
     855             : uint32
     856     4151782 : get_hash_value(HTAB *hashp, const void *keyPtr)
     857             : {
     858     4151782 :     return hashp->hash(keyPtr, hashp->keysize);
     859             : }
     860             : 
     861             : /* Convert a hash value to a bucket number */
     862             : static inline uint32
     863     8244430 : calc_bucket(HASHHDR *hctl, uint32 hash_val)
     864             : {
     865             :     uint32      bucket;
     866             : 
     867     8244430 :     bucket = hash_val & hctl->high_mask;
     868     8244430 :     if (bucket > hctl->max_bucket)
     869     3871448 :         bucket = bucket & hctl->low_mask;
     870             : 
     871     8244430 :     return bucket;
     872             : }
     873             : 
     874             : /*
     875             :  * hash_search -- look up key in table and perform action
     876             :  * hash_search_with_hash_value -- same, with key's hash value already computed
     877             :  *
     878             :  * action is one of:
     879             :  *      HASH_FIND: look up key in table
     880             :  *      HASH_ENTER: look up key in table, creating entry if not present
     881             :  *      HASH_ENTER_NULL: same, but return NULL if out of memory
     882             :  *      HASH_REMOVE: look up key in table, remove entry if present
     883             :  *
     884             :  * Return value is a pointer to the element found/entered/removed if any,
     885             :  * or NULL if no match was found.  (NB: in the case of the REMOVE action,
     886             :  * the result is a dangling pointer that shouldn't be dereferenced!)
     887             :  *
     888             :  * HASH_ENTER will normally ereport a generic "out of memory" error if
     889             :  * it is unable to create a new entry.  The HASH_ENTER_NULL operation is
     890             :  * the same except it will return NULL if out of memory.  Note that
     891             :  * HASH_ENTER_NULL cannot be used with the default palloc-based allocator,
     892             :  * since palloc internally ereports on out-of-memory.
     893             :  *
     894             :  * If foundPtr isn't NULL, then *foundPtr is set TRUE if we found an
     895             :  * existing entry in the table, FALSE otherwise.  This is needed in the
     896             :  * HASH_ENTER case, but is redundant with the return value otherwise.
     897             :  *
     898             :  * For hash_search_with_hash_value, the hashvalue parameter must have been
     899             :  * calculated with get_hash_value().
     900             :  */
     901             : void *
     902     4604875 : hash_search(HTAB *hashp,
     903             :             const void *keyPtr,
     904             :             HASHACTION action,
     905             :             bool *foundPtr)
     906             : {
     907     4604875 :     return hash_search_with_hash_value(hashp,
     908             :                                        keyPtr,
     909     4604875 :                                        hashp->hash(keyPtr, hashp->keysize),
     910             :                                        action,
     911             :                                        foundPtr);
     912             : }
     913             : 
     914             : void *
     915     8220006 : hash_search_with_hash_value(HTAB *hashp,
     916             :                             const void *keyPtr,
     917             :                             uint32 hashvalue,
     918             :                             HASHACTION action,
     919             :                             bool *foundPtr)
     920             : {
     921     8220006 :     HASHHDR    *hctl = hashp->hctl;
     922     8220006 :     int         freelist_idx = FREELIST_IDX(hctl, hashvalue);
     923             :     Size        keysize;
     924             :     uint32      bucket;
     925             :     long        segment_num;
     926             :     long        segment_ndx;
     927             :     HASHSEGMENT segp;
     928             :     HASHBUCKET  currBucket;
     929             :     HASHBUCKET *prevBucketPtr;
     930             :     HashCompareFunc match;
     931             : 
     932             : #if HASH_STATISTICS
     933             :     hash_accesses++;
     934             :     hctl->accesses++;
     935             : #endif
     936             : 
     937             :     /*
     938             :      * If inserting, check if it is time to split a bucket.
     939             :      *
     940             :      * NOTE: failure to expand table is not a fatal error, it just means we
     941             :      * have to run at higher fill factor than we wanted.  However, if we're
     942             :      * using the palloc allocator then it will throw error anyway on
     943             :      * out-of-memory, so we must do this before modifying the table.
     944             :      */
     945     8220006 :     if (action == HASH_ENTER || action == HASH_ENTER_NULL)
     946             :     {
     947             :         /*
     948             :          * Can't split if running in partitioned mode, nor if frozen, nor if
     949             :          * table is the subject of any active hash_seq_search scans.  Strange
     950             :          * order of these tests is to try to check cheaper conditions first.
     951             :          */
     952     3016225 :         if (!IS_PARTITIONED(hctl) && !hashp->frozen &&
     953     1426349 :             hctl->freeList[0].nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
     954       18967 :             !has_seq_scans(hashp))
     955       18967 :             (void) expand_table(hashp);
     956             :     }
     957             : 
     958             :     /*
     959             :      * Do the initial lookup
     960             :      */
     961     8220006 :     bucket = calc_bucket(hctl, hashvalue);
     962             : 
     963     8220006 :     segment_num = bucket >> hashp->sshift;
     964     8220006 :     segment_ndx = MOD(bucket, hashp->ssize);
     965             : 
     966     8220006 :     segp = hashp->dir[segment_num];
     967             : 
     968     8220006 :     if (segp == NULL)
     969           0 :         hash_corrupted(hashp);
     970             : 
     971     8220006 :     prevBucketPtr = &segp[segment_ndx];
     972     8220006 :     currBucket = *prevBucketPtr;
     973             : 
     974             :     /*
     975             :      * Follow collision chain looking for matching key
     976             :      */
     977     8220006 :     match = hashp->match;        /* save one fetch in inner loop */
     978     8220006 :     keysize = hashp->keysize;    /* ditto */
     979             : 
     980    17892371 :     while (currBucket != NULL)
     981             :     {
     982    14786425 :         if (currBucket->hashvalue == hashvalue &&
     983     6667094 :             match(ELEMENTKEY(currBucket), keyPtr, keysize) == 0)
     984     6666972 :             break;
     985     1452359 :         prevBucketPtr = &(currBucket->link);
     986     1452359 :         currBucket = *prevBucketPtr;
     987             : #if HASH_STATISTICS
     988             :         hash_collisions++;
     989             :         hctl->collisions++;
     990             : #endif
     991             :     }
     992             : 
     993     8220006 :     if (foundPtr)
     994     1575416 :         *foundPtr = (bool) (currBucket != NULL);
     995             : 
     996             :     /*
     997             :      * OK, now what?
     998             :      */
     999     8220006 :     switch (action)
    1000             :     {
    1001             :         case HASH_FIND:
    1002     5471252 :             if (currBucket != NULL)
    1003     5260780 :                 return (void *) ELEMENTKEY(currBucket);
    1004      210472 :             return NULL;
    1005             : 
    1006             :         case HASH_REMOVE:
    1007     1139911 :             if (currBucket != NULL)
    1008             :             {
    1009             :                 /* if partitioned, must lock to touch nentries and freeList */
    1010     1138088 :                 if (IS_PARTITIONED(hctl))
    1011      182868 :                     SpinLockAcquire(&(hctl->freeList[freelist_idx].mutex));
    1012             : 
    1013             :                 /* delete the record from the appropriate nentries counter. */
    1014     1138088 :                 Assert(hctl->freeList[freelist_idx].nentries > 0);
    1015     1138088 :                 hctl->freeList[freelist_idx].nentries--;
    1016             : 
    1017             :                 /* remove record from hash bucket's chain. */
    1018     1138088 :                 *prevBucketPtr = currBucket->link;
    1019             : 
    1020             :                 /* add the record to the appropriate freelist. */
    1021     1138088 :                 currBucket->link = hctl->freeList[freelist_idx].freeList;
    1022     1138088 :                 hctl->freeList[freelist_idx].freeList = currBucket;
    1023             : 
    1024     1138088 :                 if (IS_PARTITIONED(hctl))
    1025      182868 :                     SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1026             : 
    1027             :                 /*
    1028             :                  * better hope the caller is synchronizing access to this
    1029             :                  * element, because someone else is going to reuse it the next
    1030             :                  * time something is added to the table
    1031             :                  */
    1032     1138088 :                 return (void *) ELEMENTKEY(currBucket);
    1033             :             }
    1034        1823 :             return NULL;
    1035             : 
    1036             :         case HASH_ENTER_NULL:
    1037             :             /* ENTER_NULL does not work with palloc-based allocator */
    1038      184497 :             Assert(hashp->alloc != DynaHashAlloc);
    1039             :             /* FALL THRU */
    1040             : 
    1041             :         case HASH_ENTER:
    1042             :             /* Return existing element if found, else create one */
    1043     1608843 :             if (currBucket != NULL)
    1044      268104 :                 return (void *) ELEMENTKEY(currBucket);
    1045             : 
    1046             :             /* disallow inserts if frozen */
    1047     1340739 :             if (hashp->frozen)
    1048           0 :                 elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
    1049             :                      hashp->tabname);
    1050             : 
    1051     1340739 :             currBucket = get_hash_entry(hashp, freelist_idx);
    1052     1340739 :             if (currBucket == NULL)
    1053             :             {
    1054             :                 /* out of memory */
    1055           0 :                 if (action == HASH_ENTER_NULL)
    1056           0 :                     return NULL;
    1057             :                 /* report a generic message */
    1058           0 :                 if (hashp->isshared)
    1059           0 :                     ereport(ERROR,
    1060             :                             (errcode(ERRCODE_OUT_OF_MEMORY),
    1061             :                              errmsg("out of shared memory")));
    1062             :                 else
    1063           0 :                     ereport(ERROR,
    1064             :                             (errcode(ERRCODE_OUT_OF_MEMORY),
    1065             :                              errmsg("out of memory")));
    1066             :             }
    1067             : 
    1068             :             /* link into hashbucket chain */
    1069     1340739 :             *prevBucketPtr = currBucket;
    1070     1340739 :             currBucket->link = NULL;
    1071             : 
    1072             :             /* copy key into record */
    1073     1340739 :             currBucket->hashvalue = hashvalue;
    1074     1340739 :             hashp->keycopy(ELEMENTKEY(currBucket), keyPtr, keysize);
    1075             : 
    1076             :             /*
    1077             :              * Caller is expected to fill the data field on return.  DO NOT
    1078             :              * insert any code that could possibly throw error here, as doing
    1079             :              * so would leave the table entry incomplete and hence corrupt the
    1080             :              * caller's data structure.
    1081             :              */
    1082             : 
    1083     1340739 :             return (void *) ELEMENTKEY(currBucket);
    1084             :     }
    1085             : 
    1086           0 :     elog(ERROR, "unrecognized hash action code: %d", (int) action);
    1087             : 
    1088             :     return NULL;                /* keep compiler quiet */
    1089             : }
    1090             : 
    1091             : /*
    1092             :  * hash_update_hash_key -- change the hash key of an existing table entry
    1093             :  *
    1094             :  * This is equivalent to removing the entry, making a new entry, and copying
    1095             :  * over its data, except that the entry never goes to the table's freelist.
    1096             :  * Therefore this cannot suffer an out-of-memory failure, even if there are
    1097             :  * other processes operating in other partitions of the hashtable.
    1098             :  *
    1099             :  * Returns TRUE if successful, FALSE if the requested new hash key is already
    1100             :  * present.  Throws error if the specified entry pointer isn't actually a
    1101             :  * table member.
    1102             :  *
    1103             :  * NB: currently, there is no special case for old and new hash keys being
    1104             :  * identical, which means we'll report FALSE for that situation.  This is
    1105             :  * preferable for existing uses.
    1106             :  *
    1107             :  * NB: for a partitioned hashtable, caller must hold lock on both relevant
    1108             :  * partitions, if the new hash key would belong to a different partition.
    1109             :  */
    1110             : bool
    1111          17 : hash_update_hash_key(HTAB *hashp,
    1112             :                      void *existingEntry,
    1113             :                      const void *newKeyPtr)
    1114             : {
    1115          17 :     HASHELEMENT *existingElement = ELEMENT_FROM_KEY(existingEntry);
    1116          17 :     HASHHDR    *hctl = hashp->hctl;
    1117             :     uint32      newhashvalue;
    1118             :     Size        keysize;
    1119             :     uint32      bucket;
    1120             :     uint32      newbucket;
    1121             :     long        segment_num;
    1122             :     long        segment_ndx;
    1123             :     HASHSEGMENT segp;
    1124             :     HASHBUCKET  currBucket;
    1125             :     HASHBUCKET *prevBucketPtr;
    1126             :     HASHBUCKET *oldPrevPtr;
    1127             :     HashCompareFunc match;
    1128             : 
    1129             : #if HASH_STATISTICS
    1130             :     hash_accesses++;
    1131             :     hctl->accesses++;
    1132             : #endif
    1133             : 
    1134             :     /* disallow updates if frozen */
    1135          17 :     if (hashp->frozen)
    1136           0 :         elog(ERROR, "cannot update in frozen hashtable \"%s\"",
    1137             :              hashp->tabname);
    1138             : 
    1139             :     /*
    1140             :      * Lookup the existing element using its saved hash value.  We need to do
    1141             :      * this to be able to unlink it from its hash chain, but as a side benefit
    1142             :      * we can verify the validity of the passed existingEntry pointer.
    1143             :      */
    1144          17 :     bucket = calc_bucket(hctl, existingElement->hashvalue);
    1145             : 
    1146          17 :     segment_num = bucket >> hashp->sshift;
    1147          17 :     segment_ndx = MOD(bucket, hashp->ssize);
    1148             : 
    1149          17 :     segp = hashp->dir[segment_num];
    1150             : 
    1151          17 :     if (segp == NULL)
    1152           0 :         hash_corrupted(hashp);
    1153             : 
    1154          17 :     prevBucketPtr = &segp[segment_ndx];
    1155          17 :     currBucket = *prevBucketPtr;
    1156             : 
    1157          34 :     while (currBucket != NULL)
    1158             :     {
    1159          17 :         if (currBucket == existingElement)
    1160          17 :             break;
    1161           0 :         prevBucketPtr = &(currBucket->link);
    1162           0 :         currBucket = *prevBucketPtr;
    1163             :     }
    1164             : 
    1165          17 :     if (currBucket == NULL)
    1166           0 :         elog(ERROR, "hash_update_hash_key argument is not in hashtable \"%s\"",
    1167             :              hashp->tabname);
    1168             : 
    1169          17 :     oldPrevPtr = prevBucketPtr;
    1170             : 
    1171             :     /*
    1172             :      * Now perform the equivalent of a HASH_ENTER operation to locate the hash
    1173             :      * chain we want to put the entry into.
    1174             :      */
    1175          17 :     newhashvalue = hashp->hash(newKeyPtr, hashp->keysize);
    1176             : 
    1177          17 :     newbucket = calc_bucket(hctl, newhashvalue);
    1178             : 
    1179          17 :     segment_num = newbucket >> hashp->sshift;
    1180          17 :     segment_ndx = MOD(newbucket, hashp->ssize);
    1181             : 
    1182          17 :     segp = hashp->dir[segment_num];
    1183             : 
    1184          17 :     if (segp == NULL)
    1185           0 :         hash_corrupted(hashp);
    1186             : 
    1187          17 :     prevBucketPtr = &segp[segment_ndx];
    1188          17 :     currBucket = *prevBucketPtr;
    1189             : 
    1190             :     /*
    1191             :      * Follow collision chain looking for matching key
    1192             :      */
    1193          17 :     match = hashp->match;        /* save one fetch in inner loop */
    1194          17 :     keysize = hashp->keysize;    /* ditto */
    1195             : 
    1196          34 :     while (currBucket != NULL)
    1197             :     {
    1198           0 :         if (currBucket->hashvalue == newhashvalue &&
    1199           0 :             match(ELEMENTKEY(currBucket), newKeyPtr, keysize) == 0)
    1200           0 :             break;
    1201           0 :         prevBucketPtr = &(currBucket->link);
    1202           0 :         currBucket = *prevBucketPtr;
    1203             : #if HASH_STATISTICS
    1204             :         hash_collisions++;
    1205             :         hctl->collisions++;
    1206             : #endif
    1207             :     }
    1208             : 
    1209          17 :     if (currBucket != NULL)
    1210           0 :         return false;           /* collision with an existing entry */
    1211             : 
    1212          17 :     currBucket = existingElement;
    1213             : 
    1214             :     /*
    1215             :      * If old and new hash values belong to the same bucket, we need not
    1216             :      * change any chain links, and indeed should not since this simplistic
    1217             :      * update will corrupt the list if currBucket is the last element.  (We
    1218             :      * cannot fall out earlier, however, since we need to scan the bucket to
    1219             :      * check for duplicate keys.)
    1220             :      */
    1221          17 :     if (bucket != newbucket)
    1222             :     {
    1223             :         /* OK to remove record from old hash bucket's chain. */
    1224          17 :         *oldPrevPtr = currBucket->link;
    1225             : 
    1226             :         /* link into new hashbucket chain */
    1227          17 :         *prevBucketPtr = currBucket;
    1228          17 :         currBucket->link = NULL;
    1229             :     }
    1230             : 
    1231             :     /* copy new key into record */
    1232          17 :     currBucket->hashvalue = newhashvalue;
    1233          17 :     hashp->keycopy(ELEMENTKEY(currBucket), newKeyPtr, keysize);
    1234             : 
    1235             :     /* rest of record is untouched */
    1236             : 
    1237          17 :     return true;
    1238             : }
    1239             : 
    1240             : /*
    1241             :  * Allocate a new hashtable entry if possible; return NULL if out of memory.
    1242             :  * (Or, if the underlying space allocator throws error for out-of-memory,
    1243             :  * we won't return at all.)
    1244             :  */
    1245             : static HASHBUCKET
    1246     1340739 : get_hash_entry(HTAB *hashp, int freelist_idx)
    1247             : {
    1248     1340739 :     HASHHDR    *hctl = hashp->hctl;
    1249             :     HASHBUCKET  newElement;
    1250             : 
    1251             :     for (;;)
    1252             :     {
    1253             :         /* if partitioned, must lock to touch nentries and freeList */
    1254     1350235 :         if (IS_PARTITIONED(hctl))
    1255      192210 :             SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
    1256             : 
    1257             :         /* try to get an entry from the freelist */
    1258     1350235 :         newElement = hctl->freeList[freelist_idx].freeList;
    1259             : 
    1260     1350235 :         if (newElement != NULL)
    1261     1340739 :             break;
    1262             : 
    1263        9496 :         if (IS_PARTITIONED(hctl))
    1264           0 :             SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1265             : 
    1266             :         /*
    1267             :          * No free elements in this freelist.  In a partitioned table, there
    1268             :          * might be entries in other freelists, but to reduce contention we
    1269             :          * prefer to first try to get another chunk of buckets from the main
    1270             :          * shmem allocator.  If that fails, though, we *MUST* root through all
    1271             :          * the other freelists before giving up.  There are multiple callers
    1272             :          * that assume that they can allocate every element in the initially
    1273             :          * requested table size, or that deleting an element guarantees they
    1274             :          * can insert a new element, even if shared memory is entirely full.
    1275             :          * Failing because the needed element is in a different freelist is
    1276             :          * not acceptable.
    1277             :          */
    1278        9496 :         if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
    1279             :         {
    1280             :             int         borrow_from_idx;
    1281             : 
    1282           0 :             if (!IS_PARTITIONED(hctl))
    1283           0 :                 return NULL;    /* out of memory */
    1284             : 
    1285             :             /* try to borrow element from another freelist */
    1286           0 :             borrow_from_idx = freelist_idx;
    1287             :             for (;;)
    1288             :             {
    1289           0 :                 borrow_from_idx = (borrow_from_idx + 1) % NUM_FREELISTS;
    1290           0 :                 if (borrow_from_idx == freelist_idx)
    1291           0 :                     break;      /* examined all freelists, fail */
    1292             : 
    1293           0 :                 SpinLockAcquire(&(hctl->freeList[borrow_from_idx].mutex));
    1294           0 :                 newElement = hctl->freeList[borrow_from_idx].freeList;
    1295             : 
    1296           0 :                 if (newElement != NULL)
    1297             :                 {
    1298           0 :                     hctl->freeList[borrow_from_idx].freeList = newElement->link;
    1299           0 :                     SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
    1300             : 
    1301             :                     /* careful: count the new element in its proper freelist */
    1302           0 :                     SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
    1303           0 :                     hctl->freeList[freelist_idx].nentries++;
    1304           0 :                     SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1305             : 
    1306           0 :                     return newElement;
    1307             :                 }
    1308             : 
    1309           0 :                 SpinLockRelease(&(hctl->freeList[borrow_from_idx].mutex));
    1310           0 :             }
    1311             : 
    1312             :             /* no elements available to borrow either, so out of memory */
    1313           0 :             return NULL;
    1314             :         }
    1315        9496 :     }
    1316             : 
    1317             :     /* remove entry from freelist, bump nentries */
    1318     1340739 :     hctl->freeList[freelist_idx].freeList = newElement->link;
    1319     1340739 :     hctl->freeList[freelist_idx].nentries++;
    1320             : 
    1321     1340739 :     if (IS_PARTITIONED(hctl))
    1322      192210 :         SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1323             : 
    1324     1340739 :     return newElement;
    1325             : }
    1326             : 
    1327             : /*
    1328             :  * hash_get_num_entries -- get the number of entries in a hashtable
    1329             :  */
    1330             : long
    1331         217 : hash_get_num_entries(HTAB *hashp)
    1332             : {
    1333             :     int         i;
    1334         217 :     long        sum = hashp->hctl->freeList[0].nentries;
    1335             : 
    1336             :     /*
    1337             :      * We currently don't bother with acquiring the mutexes; it's only
    1338             :      * sensible to call this function if you've got lock on all partitions of
    1339             :      * the table.
    1340             :      */
    1341         217 :     if (IS_PARTITIONED(hashp->hctl))
    1342             :     {
    1343        3744 :         for (i = 1; i < NUM_FREELISTS; i++)
    1344        3627 :             sum += hashp->hctl->freeList[i].nentries;
    1345             :     }
    1346             : 
    1347         217 :     return sum;
    1348             : }
    1349             : 
    1350             : /*
    1351             :  * hash_seq_init/_search/_term
    1352             :  *          Sequentially search through hash table and return
    1353             :  *          all the elements one by one, return NULL when no more.
    1354             :  *
    1355             :  * hash_seq_term should be called if and only if the scan is abandoned before
    1356             :  * completion; if hash_seq_search returns NULL then it has already done the
    1357             :  * end-of-scan cleanup.
    1358             :  *
    1359             :  * NOTE: caller may delete the returned element before continuing the scan.
    1360             :  * However, deleting any other element while the scan is in progress is
    1361             :  * UNDEFINED (it might be the one that curIndex is pointing at!).  Also,
    1362             :  * if elements are added to the table while the scan is in progress, it is
    1363             :  * unspecified whether they will be visited by the scan or not.
    1364             :  *
    1365             :  * NOTE: it is possible to use hash_seq_init/hash_seq_search without any
    1366             :  * worry about hash_seq_term cleanup, if the hashtable is first locked against
    1367             :  * further insertions by calling hash_freeze.
    1368             :  *
    1369             :  * NOTE: to use this with a partitioned hashtable, caller had better hold
    1370             :  * at least shared lock on all partitions of the table throughout the scan!
    1371             :  * We can cope with insertions or deletions by our own backend, but *not*
    1372             :  * with concurrent insertions or deletions by another.
    1373             :  */
    1374             : void
    1375      158036 : hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
    1376             : {
    1377      158036 :     status->hashp = hashp;
    1378      158036 :     status->curBucket = 0;
    1379      158036 :     status->curEntry = NULL;
    1380      158036 :     if (!hashp->frozen)
    1381      158036 :         register_seq_scan(hashp);
    1382      158036 : }
    1383             : 
    1384             : void *
    1385     1749359 : hash_seq_search(HASH_SEQ_STATUS *status)
    1386             : {
    1387             :     HTAB       *hashp;
    1388             :     HASHHDR    *hctl;
    1389             :     uint32      max_bucket;
    1390             :     long        ssize;
    1391             :     long        segment_num;
    1392             :     long        segment_ndx;
    1393             :     HASHSEGMENT segp;
    1394             :     uint32      curBucket;
    1395             :     HASHELEMENT *curElem;
    1396             : 
    1397     1749359 :     if ((curElem = status->curEntry) != NULL)
    1398             :     {
    1399             :         /* Continuing scan of curBucket... */
    1400      434159 :         status->curEntry = curElem->link;
    1401      434159 :         if (status->curEntry == NULL)    /* end of this bucket */
    1402      311924 :             ++status->curBucket;
    1403      434159 :         return (void *) ELEMENTKEY(curElem);
    1404             :     }
    1405             : 
    1406             :     /*
    1407             :      * Search for next nonempty bucket starting at curBucket.
    1408             :      */
    1409     1315200 :     curBucket = status->curBucket;
    1410     1315200 :     hashp = status->hashp;
    1411     1315200 :     hctl = hashp->hctl;
    1412     1315200 :     ssize = hashp->ssize;
    1413     1315200 :     max_bucket = hctl->max_bucket;
    1414             : 
    1415     1315200 :     if (curBucket > max_bucket)
    1416             :     {
    1417        8933 :         hash_seq_term(status);
    1418        8933 :         return NULL;            /* search is done */
    1419             :     }
    1420             : 
    1421             :     /*
    1422             :      * first find the right segment in the table directory.
    1423             :      */
    1424     1306267 :     segment_num = curBucket >> hashp->sshift;
    1425     1306267 :     segment_ndx = MOD(curBucket, ssize);
    1426             : 
    1427     1306267 :     segp = hashp->dir[segment_num];
    1428             : 
    1429             :     /*
    1430             :      * Pick up the first item in this bucket's chain.  If chain is not empty
    1431             :      * we can begin searching it.  Otherwise we have to advance to find the
    1432             :      * next nonempty bucket.  We try to optimize that case since searching a
    1433             :      * near-empty hashtable has to iterate this loop a lot.
    1434             :      */
    1435    13262969 :     while ((curElem = segp[segment_ndx]) == NULL)
    1436             :     {
    1437             :         /* empty bucket, advance to next */
    1438    10799436 :         if (++curBucket > max_bucket)
    1439             :         {
    1440      149001 :             status->curBucket = curBucket;
    1441      149001 :             hash_seq_term(status);
    1442      149001 :             return NULL;        /* search is done */
    1443             :         }
    1444    10650435 :         if (++segment_ndx >= ssize)
    1445             :         {
    1446        6649 :             segment_num++;
    1447        6649 :             segment_ndx = 0;
    1448        6649 :             segp = hashp->dir[segment_num];
    1449             :         }
    1450             :     }
    1451             : 
    1452             :     /* Begin scan of curBucket... */
    1453     1157266 :     status->curEntry = curElem->link;
    1454     1157266 :     if (status->curEntry == NULL)    /* end of this bucket */
    1455      845342 :         ++curBucket;
    1456     1157266 :     status->curBucket = curBucket;
    1457     1157266 :     return (void *) ELEMENTKEY(curElem);
    1458             : }
    1459             : 
    1460             : void
    1461      158036 : hash_seq_term(HASH_SEQ_STATUS *status)
    1462             : {
    1463      158036 :     if (!status->hashp->frozen)
    1464      158036 :         deregister_seq_scan(status->hashp);
    1465      158036 : }
    1466             : 
    1467             : /*
    1468             :  * hash_freeze
    1469             :  *          Freeze a hashtable against future insertions (deletions are
    1470             :  *          still allowed)
    1471             :  *
    1472             :  * The reason for doing this is that by preventing any more bucket splits,
    1473             :  * we no longer need to worry about registering hash_seq_search scans,
    1474             :  * and thus caller need not be careful about ensuring hash_seq_term gets
    1475             :  * called at the right times.
    1476             :  *
    1477             :  * Multiple calls to hash_freeze() are allowed, but you can't freeze a table
    1478             :  * with active scans (since hash_seq_term would then do the wrong thing).
    1479             :  */
    1480             : void
    1481           0 : hash_freeze(HTAB *hashp)
    1482             : {
    1483           0 :     if (hashp->isshared)
    1484           0 :         elog(ERROR, "cannot freeze shared hashtable \"%s\"", hashp->tabname);
    1485           0 :     if (!hashp->frozen && has_seq_scans(hashp))
    1486           0 :         elog(ERROR, "cannot freeze hashtable \"%s\" because it has active scans",
    1487             :              hashp->tabname);
    1488           0 :     hashp->frozen = true;
    1489           0 : }
    1490             : 
    1491             : 
    1492             : /********************************* UTILITIES ************************/
    1493             : 
    1494             : /*
    1495             :  * Expand the table by adding one more hash bucket.
    1496             :  */
    1497             : static bool
    1498       18967 : expand_table(HTAB *hashp)
    1499             : {
    1500       18967 :     HASHHDR    *hctl = hashp->hctl;
    1501             :     HASHSEGMENT old_seg,
    1502             :                 new_seg;
    1503             :     long        old_bucket,
    1504             :                 new_bucket;
    1505             :     long        new_segnum,
    1506             :                 new_segndx;
    1507             :     long        old_segnum,
    1508             :                 old_segndx;
    1509             :     HASHBUCKET *oldlink,
    1510             :                *newlink;
    1511             :     HASHBUCKET  currElement,
    1512             :                 nextElement;
    1513             : 
    1514       18967 :     Assert(!IS_PARTITIONED(hctl));
    1515             : 
    1516             : #ifdef HASH_STATISTICS
    1517             :     hash_expansions++;
    1518             : #endif
    1519             : 
    1520       18967 :     new_bucket = hctl->max_bucket + 1;
    1521       18967 :     new_segnum = new_bucket >> hashp->sshift;
    1522       18967 :     new_segndx = MOD(new_bucket, hashp->ssize);
    1523             : 
    1524       18967 :     if (new_segnum >= hctl->nsegs)
    1525             :     {
    1526             :         /* Allocate new segment if necessary -- could fail if dir full */
    1527          89 :         if (new_segnum >= hctl->dsize)
    1528           0 :             if (!dir_realloc(hashp))
    1529           0 :                 return false;
    1530          89 :         if (!(hashp->dir[new_segnum] = seg_alloc(hashp)))
    1531           0 :             return false;
    1532          89 :         hctl->nsegs++;
    1533             :     }
    1534             : 
    1535             :     /* OK, we created a new bucket */
    1536       18967 :     hctl->max_bucket++;
    1537             : 
    1538             :     /*
    1539             :      * *Before* changing masks, find old bucket corresponding to same hash
    1540             :      * values; values in that bucket may need to be relocated to new bucket.
    1541             :      * Note that new_bucket is certainly larger than low_mask at this point,
    1542             :      * so we can skip the first step of the regular hash mask calc.
    1543             :      */
    1544       18967 :     old_bucket = (new_bucket & hctl->low_mask);
    1545             : 
    1546             :     /*
    1547             :      * If we crossed a power of 2, readjust masks.
    1548             :      */
    1549       18967 :     if ((uint32) new_bucket > hctl->high_mask)
    1550             :     {
    1551         207 :         hctl->low_mask = hctl->high_mask;
    1552         207 :         hctl->high_mask = (uint32) new_bucket | hctl->low_mask;
    1553             :     }
    1554             : 
    1555             :     /*
    1556             :      * Relocate records to the new bucket.  NOTE: because of the way the hash
    1557             :      * masking is done in calc_bucket, only one old bucket can need to be
    1558             :      * split at this point.  With a different way of reducing the hash value,
    1559             :      * that might not be true!
    1560             :      */
    1561       18967 :     old_segnum = old_bucket >> hashp->sshift;
    1562       18967 :     old_segndx = MOD(old_bucket, hashp->ssize);
    1563             : 
    1564       18967 :     old_seg = hashp->dir[old_segnum];
    1565       18967 :     new_seg = hashp->dir[new_segnum];
    1566             : 
    1567       18967 :     oldlink = &old_seg[old_segndx];
    1568       18967 :     newlink = &new_seg[new_segndx];
    1569             : 
    1570       62324 :     for (currElement = *oldlink;
    1571             :          currElement != NULL;
    1572       24390 :          currElement = nextElement)
    1573             :     {
    1574       24390 :         nextElement = currElement->link;
    1575       24390 :         if ((long) calc_bucket(hctl, currElement->hashvalue) == old_bucket)
    1576             :         {
    1577       12582 :             *oldlink = currElement;
    1578       12582 :             oldlink = &currElement->link;
    1579             :         }
    1580             :         else
    1581             :         {
    1582       11808 :             *newlink = currElement;
    1583       11808 :             newlink = &currElement->link;
    1584             :         }
    1585             :     }
    1586             :     /* don't forget to terminate the rebuilt hash chains... */
    1587       18967 :     *oldlink = NULL;
    1588       18967 :     *newlink = NULL;
    1589             : 
    1590       18967 :     return true;
    1591             : }
    1592             : 
    1593             : 
    1594             : static bool
    1595           0 : dir_realloc(HTAB *hashp)
    1596             : {
    1597             :     HASHSEGMENT *p;
    1598             :     HASHSEGMENT *old_p;
    1599             :     long        new_dsize;
    1600             :     long        old_dirsize;
    1601             :     long        new_dirsize;
    1602             : 
    1603           0 :     if (hashp->hctl->max_dsize != NO_MAX_DSIZE)
    1604           0 :         return false;
    1605             : 
    1606             :     /* Reallocate directory */
    1607           0 :     new_dsize = hashp->hctl->dsize << 1;
    1608           0 :     old_dirsize = hashp->hctl->dsize * sizeof(HASHSEGMENT);
    1609           0 :     new_dirsize = new_dsize * sizeof(HASHSEGMENT);
    1610             : 
    1611           0 :     old_p = hashp->dir;
    1612           0 :     CurrentDynaHashCxt = hashp->hcxt;
    1613           0 :     p = (HASHSEGMENT *) hashp->alloc((Size) new_dirsize);
    1614             : 
    1615           0 :     if (p != NULL)
    1616             :     {
    1617           0 :         memcpy(p, old_p, old_dirsize);
    1618           0 :         MemSet(((char *) p) + old_dirsize, 0, new_dirsize - old_dirsize);
    1619           0 :         hashp->dir = p;
    1620           0 :         hashp->hctl->dsize = new_dsize;
    1621             : 
    1622             :         /* XXX assume the allocator is palloc, so we know how to free */
    1623           0 :         Assert(hashp->alloc == DynaHashAlloc);
    1624           0 :         pfree(old_p);
    1625             : 
    1626           0 :         return true;
    1627             :     }
    1628             : 
    1629           0 :     return false;
    1630             : }
    1631             : 
    1632             : 
    1633             : static HASHSEGMENT
    1634       12585 : seg_alloc(HTAB *hashp)
    1635             : {
    1636             :     HASHSEGMENT segp;
    1637             : 
    1638       12585 :     CurrentDynaHashCxt = hashp->hcxt;
    1639       12585 :     segp = (HASHSEGMENT) hashp->alloc(sizeof(HASHBUCKET) * hashp->ssize);
    1640             : 
    1641       12585 :     if (!segp)
    1642           0 :         return NULL;
    1643             : 
    1644       12585 :     MemSet(segp, 0, sizeof(HASHBUCKET) * hashp->ssize);
    1645             : 
    1646       12585 :     return segp;
    1647             : }
    1648             : 
    1649             : /*
    1650             :  * allocate some new elements and link them into the indicated free list
    1651             :  */
    1652             : static bool
    1653       13564 : element_alloc(HTAB *hashp, int nelem, int freelist_idx)
    1654             : {
    1655       13564 :     HASHHDR    *hctl = hashp->hctl;
    1656             :     Size        elementSize;
    1657             :     HASHELEMENT *firstElement;
    1658             :     HASHELEMENT *tmpElement;
    1659             :     HASHELEMENT *prevElement;
    1660             :     int         i;
    1661             : 
    1662       13564 :     if (hashp->isfixed)
    1663           0 :         return false;
    1664             : 
    1665             :     /* Each element has a HASHELEMENT header plus user data. */
    1666       13564 :     elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
    1667             : 
    1668       13564 :     CurrentDynaHashCxt = hashp->hcxt;
    1669       13564 :     firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
    1670             : 
    1671       13564 :     if (!firstElement)
    1672           0 :         return false;
    1673             : 
    1674             :     /* prepare to link all the new entries into the freelist */
    1675       13564 :     prevElement = NULL;
    1676       13564 :     tmpElement = firstElement;
    1677      661202 :     for (i = 0; i < nelem; i++)
    1678             :     {
    1679      647638 :         tmpElement->link = prevElement;
    1680      647638 :         prevElement = tmpElement;
    1681      647638 :         tmpElement = (HASHELEMENT *) (((char *) tmpElement) + elementSize);
    1682             :     }
    1683             : 
    1684             :     /* if partitioned, must lock to touch freeList */
    1685       13564 :     if (IS_PARTITIONED(hctl))
    1686         800 :         SpinLockAcquire(&hctl->freeList[freelist_idx].mutex);
    1687             : 
    1688             :     /* freelist could be nonempty if two backends did this concurrently */
    1689       13564 :     firstElement->link = hctl->freeList[freelist_idx].freeList;
    1690       13564 :     hctl->freeList[freelist_idx].freeList = prevElement;
    1691             : 
    1692       13564 :     if (IS_PARTITIONED(hctl))
    1693         800 :         SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
    1694             : 
    1695       13564 :     return true;
    1696             : }
    1697             : 
    1698             : /* complain when we have detected a corrupted hashtable */
    1699             : static void
    1700           0 : hash_corrupted(HTAB *hashp)
    1701             : {
    1702             :     /*
    1703             :      * If the corruption is in a shared hashtable, we'd better force a
    1704             :      * systemwide restart.  Otherwise, just shut down this one backend.
    1705             :      */
    1706           0 :     if (hashp->isshared)
    1707           0 :         elog(PANIC, "hash table \"%s\" corrupted", hashp->tabname);
    1708             :     else
    1709           0 :         elog(FATAL, "hash table \"%s\" corrupted", hashp->tabname);
    1710             : }
    1711             : 
    1712             : /* calculate ceil(log base 2) of num */
    1713             : int
    1714       47728 : my_log2(long num)
    1715             : {
    1716             :     int         i;
    1717             :     long        limit;
    1718             : 
    1719             :     /* guard against too-large input, which would put us into infinite loop */
    1720       47728 :     if (num > LONG_MAX / 2)
    1721           0 :         num = LONG_MAX / 2;
    1722             : 
    1723       47728 :     for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
    1724             :         ;
    1725       47728 :     return i;
    1726             : }
    1727             : 
    1728             : /* calculate first power of 2 >= num, bounded to what will fit in a long */
    1729             : static long
    1730         140 : next_pow2_long(long num)
    1731             : {
    1732             :     /* my_log2's internal range check is sufficient */
    1733         140 :     return 1L << my_log2(num);
    1734             : }
    1735             : 
    1736             : /* calculate first power of 2 >= num, bounded to what will fit in an int */
    1737             : static int
    1738       19563 : next_pow2_int(long num)
    1739             : {
    1740       19563 :     if (num > INT_MAX / 2)
    1741           0 :         num = INT_MAX / 2;
    1742       19563 :     return 1 << my_log2(num);
    1743             : }
    1744             : 
    1745             : 
    1746             : /************************* SEQ SCAN TRACKING ************************/
    1747             : 
    1748             : /*
    1749             :  * We track active hash_seq_search scans here.  The need for this mechanism
    1750             :  * comes from the fact that a scan will get confused if a bucket split occurs
    1751             :  * while it's in progress: it might visit entries twice, or even miss some
    1752             :  * entirely (if it's partway through the same bucket that splits).  Hence
    1753             :  * we want to inhibit bucket splits if there are any active scans on the
    1754             :  * table being inserted into.  This is a fairly rare case in current usage,
    1755             :  * so just postponing the split until the next insertion seems sufficient.
    1756             :  *
    1757             :  * Given present usages of the function, only a few scans are likely to be
    1758             :  * open concurrently; so a finite-size stack of open scans seems sufficient,
    1759             :  * and we don't worry that linear search is too slow.  Note that we do
    1760             :  * allow multiple scans of the same hashtable to be open concurrently.
    1761             :  *
    1762             :  * This mechanism can support concurrent scan and insertion in a shared
    1763             :  * hashtable if it's the same backend doing both.  It would fail otherwise,
    1764             :  * but locking reasons seem to preclude any such scenario anyway, so we don't
    1765             :  * worry.
    1766             :  *
    1767             :  * This arrangement is reasonably robust if a transient hashtable is deleted
    1768             :  * without notifying us.  The absolute worst case is we might inhibit splits
    1769             :  * in another table created later at exactly the same address.  We will give
    1770             :  * a warning at transaction end for reference leaks, so any bugs leading to
    1771             :  * lack of notification should be easy to catch.
    1772             :  */
    1773             : 
    1774             : #define MAX_SEQ_SCANS 100
    1775             : 
    1776             : static HTAB *seq_scan_tables[MAX_SEQ_SCANS];    /* tables being scanned */
    1777             : static int  seq_scan_level[MAX_SEQ_SCANS];  /* subtransaction nest level */
    1778             : static int  num_seq_scans = 0;
    1779             : 
    1780             : 
    1781             : /* Register a table as having an active hash_seq_search scan */
    1782             : static void
    1783      158036 : register_seq_scan(HTAB *hashp)
    1784             : {
    1785      158036 :     if (num_seq_scans >= MAX_SEQ_SCANS)
    1786           0 :         elog(ERROR, "too many active hash_seq_search scans, cannot start one on \"%s\"",
    1787             :              hashp->tabname);
    1788      158036 :     seq_scan_tables[num_seq_scans] = hashp;
    1789      158036 :     seq_scan_level[num_seq_scans] = GetCurrentTransactionNestLevel();
    1790      158036 :     num_seq_scans++;
    1791      158036 : }
    1792             : 
    1793             : /* Deregister an active scan */
    1794             : static void
    1795      158036 : deregister_seq_scan(HTAB *hashp)
    1796             : {
    1797             :     int         i;
    1798             : 
    1799             :     /* Search backward since it's most likely at the stack top */
    1800      158036 :     for (i = num_seq_scans - 1; i >= 0; i--)
    1801             :     {
    1802      158036 :         if (seq_scan_tables[i] == hashp)
    1803             :         {
    1804      158036 :             seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
    1805      158036 :             seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
    1806      158036 :             num_seq_scans--;
    1807      316072 :             return;
    1808             :         }
    1809             :     }
    1810           0 :     elog(ERROR, "no hash_seq_search scan for hash table \"%s\"",
    1811             :          hashp->tabname);
    1812             : }
    1813             : 
    1814             : /* Check if a table has any active scan */
    1815             : static bool
    1816       18967 : has_seq_scans(HTAB *hashp)
    1817             : {
    1818             :     int         i;
    1819             : 
    1820       18967 :     for (i = 0; i < num_seq_scans; i++)
    1821             :     {
    1822           0 :         if (seq_scan_tables[i] == hashp)
    1823           0 :             return true;
    1824             :     }
    1825       18967 :     return false;
    1826             : }
    1827             : 
    1828             : /* Clean up any open scans at end of transaction */
    1829             : void
    1830       26218 : AtEOXact_HashTables(bool isCommit)
    1831             : {
    1832             :     /*
    1833             :      * During abort cleanup, open scans are expected; just silently clean 'em
    1834             :      * out.  An open scan at commit means someone forgot a hash_seq_term()
    1835             :      * call, so complain.
    1836             :      *
    1837             :      * Note: it's tempting to try to print the tabname here, but refrain for
    1838             :      * fear of touching deallocated memory.  This isn't a user-facing message
    1839             :      * anyway, so it needn't be pretty.
    1840             :      */
    1841       26218 :     if (isCommit)
    1842             :     {
    1843             :         int         i;
    1844             : 
    1845       22912 :         for (i = 0; i < num_seq_scans; i++)
    1846             :         {
    1847           0 :             elog(WARNING, "leaked hash_seq_search scan for hash table %p",
    1848             :                  seq_scan_tables[i]);
    1849             :         }
    1850             :     }
    1851       26218 :     num_seq_scans = 0;
    1852       26218 : }
    1853             : 
    1854             : /* Clean up any open scans at end of subtransaction */
    1855             : void
    1856         372 : AtEOSubXact_HashTables(bool isCommit, int nestDepth)
    1857             : {
    1858             :     int         i;
    1859             : 
    1860             :     /*
    1861             :      * Search backward to make cleanup easy.  Note we must check all entries,
    1862             :      * not only those at the end of the array, because deletion technique
    1863             :      * doesn't keep them in order.
    1864             :      */
    1865         372 :     for (i = num_seq_scans - 1; i >= 0; i--)
    1866             :     {
    1867           0 :         if (seq_scan_level[i] >= nestDepth)
    1868             :         {
    1869           0 :             if (isCommit)
    1870           0 :                 elog(WARNING, "leaked hash_seq_search scan for hash table %p",
    1871             :                      seq_scan_tables[i]);
    1872           0 :             seq_scan_tables[i] = seq_scan_tables[num_seq_scans - 1];
    1873           0 :             seq_scan_level[i] = seq_scan_level[num_seq_scans - 1];
    1874           0 :             num_seq_scans--;
    1875             :         }
    1876             :     }
    1877         372 : }

Generated by: LCOV version 1.11