LCOV - code coverage report
Current view: top level - src/include/lib - simplehash.h (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 206 246 83.7 %
Date: 2017-09-29 13:40:31 Functions: 31 35 88.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * simplehash.h
       3             :  *
       4             :  *    Hash table implementation which will be specialized to user-defined
       5             :  *    types, by including this file to generate the required code.  It's
       6             :  *    probably not worthwhile to do so for hash tables that aren't performance
       7             :  *    or space sensitive.
       8             :  *
       9             :  * Usage notes:
      10             :  *
      11             :  *    To generate a hash-table and associated functions for a use case several
      12             :  *    macros have to be #define'ed before this file is included.  Including
      13             :  *    the file #undef's all those, so a new hash table can be generated
      14             :  *    afterwards.
      15             :  *    The relevant parameters are:
      16             :  *    - SH_PREFIX - prefix for all symbol names generated. A prefix of 'foo'
      17             :  *      will result in hash table type 'foo_hash' and functions like
      18             :  *      'foo_insert'/'foo_lookup' and so forth.
      19             :  *    - SH_ELEMENT_TYPE - type of the contained elements
      20             :  *    - SH_KEY_TYPE - type of the hashtable's key
      21             :  *    - SH_DECLARE - if defined function prototypes and type declarations are
      22             :  *      generated
      23             :  *    - SH_DEFINE - if defined function definitions are generated
      24             :  *    - SH_SCOPE - in which scope (e.g. extern, static inline) do function
      25             :  *      declarations reside
      26             :  *    - SH_USE_NONDEFAULT_ALLOCATOR - if defined no element allocator functions
      27             :  *      are defined, so you can supply your own
      28             :  *    The following parameters are only relevant when SH_DEFINE is defined:
      29             :  *    - SH_KEY - name of the element in SH_ELEMENT_TYPE containing the hash key
      30             :  *    - SH_EQUAL(table, a, b) - compare two table keys
      31             :  *    - SH_HASH_KEY(table, key) - generate hash for the key
      32             :  *    - SH_STORE_HASH - if defined the hash is stored in the elements
      33             :  *    - SH_GET_HASH(tb, a) - return the field to store the hash in
      34             :  *
      35             :  *    For examples of usage look at simplehash.c (file local definition) and
      36             :  *    execnodes.h/execGrouping.c (exposed declaration, file local
      37             :  *    implementation).
      38             :  *
      39             :  * Hash table design:
      40             :  *
      41             :  *    The hash table design chosen is a variant of linear open-addressing. The
      42             :  *    reason for doing so is that linear addressing is CPU cache & pipeline
      43             :  *    friendly. The biggest disadvantage of simple linear addressing schemes
      44             :  *    are highly variable lookup times due to clustering, and deletions
      45             :  *    leaving a lot of tombstones around.  To address these issues a variant
      46             :  *    of "robin hood" hashing is employed.  Robin hood hashing optimizes
      47             :  *    chaining lengths by moving elements close to their optimal bucket
      48             :  *    ("rich" elements), out of the way if a to-be-inserted element is further
      49             :  *    away from its optimal position (i.e. it's "poor").  While that can make
      50             :  *    insertions slower, the average lookup performance is a lot better, and
      51             :  *    higher fill factors can be used in a still performant manner.  To avoid
      52             :  *    tombstones - which normally solve the issue that a deleted node's
      53             :  *    presence is relevant to determine whether a lookup needs to continue
      54             :  *    looking or is done - buckets following a deleted element are shifted
      55             :  *    backwards, unless they're empty or already at their optimal position.
      56             :  */
      57             : 
      58             : /* helpers */
      59             : #define SH_MAKE_PREFIX(a) CppConcat(a,_)
      60             : #define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
      61             : #define SH_MAKE_NAME_(a,b) CppConcat(a,b)
      62             : 
      63             : /* name macros for: */
      64             : 
      65             : /* type declarations */
      66             : #define SH_TYPE SH_MAKE_NAME(hash)
      67             : #define SH_STATUS SH_MAKE_NAME(status)
      68             : #define SH_STATUS_EMPTY SH_MAKE_NAME(EMPTY)
      69             : #define SH_STATUS_IN_USE SH_MAKE_NAME(IN_USE)
      70             : #define SH_ITERATOR SH_MAKE_NAME(iterator)
      71             : 
      72             : /* function declarations */
      73             : #define SH_CREATE SH_MAKE_NAME(create)
      74             : #define SH_DESTROY SH_MAKE_NAME(destroy)
      75             : #define SH_INSERT SH_MAKE_NAME(insert)
      76             : #define SH_DELETE SH_MAKE_NAME(delete)
      77             : #define SH_LOOKUP SH_MAKE_NAME(lookup)
      78             : #define SH_GROW SH_MAKE_NAME(grow)
      79             : #define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
      80             : #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
      81             : #define SH_ITERATE SH_MAKE_NAME(iterate)
      82             : #define SH_ALLOCATE SH_MAKE_NAME(allocate)
      83             : #define SH_FREE SH_MAKE_NAME(free)
      84             : #define SH_STAT SH_MAKE_NAME(stat)
      85             : 
      86             : /* internal helper functions (no externally visible prototypes) */
      87             : #define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
      88             : #define SH_NEXT SH_MAKE_NAME(next)
      89             : #define SH_PREV SH_MAKE_NAME(prev)
      90             : #define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
      91             : #define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
      92             : #define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
      93             : 
      94             : /* generate forward declarations necessary to use the hash table */
      95             : #ifdef SH_DECLARE
      96             : 
      97             : /* type definitions */
      98             : typedef struct SH_TYPE
      99             : {
     100             :     /*
     101             :      * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash
     102             :      * tables.  Note that the maximum number of elements is lower
     103             :      * (SH_MAX_FILLFACTOR)
     104             :      */
     105             :     uint64      size;
     106             : 
     107             :     /* how many elements have valid contents */
     108             :     uint32      members;
     109             : 
     110             :     /* mask for bucket and size calculations, based on size */
     111             :     uint32      sizemask;
     112             : 
     113             :     /* boundary after which to grow hashtable */
     114             :     uint32      grow_threshold;
     115             : 
     116             :     /* hash buckets */
     117             :     SH_ELEMENT_TYPE *data;
     118             : 
     119             :     /* memory context to use for allocations */
     120             :     MemoryContext ctx;
     121             : 
     122             :     /* user defined data, useful for callbacks */
     123             :     void       *private_data;
     124             : }           SH_TYPE;
     125             : 
     126             : typedef enum SH_STATUS
     127             : {
     128             :     SH_STATUS_EMPTY = 0x00,
     129             :     SH_STATUS_IN_USE = 0x01
     130             : } SH_STATUS;
     131             : 
     132             : typedef struct SH_ITERATOR
     133             : {
     134             :     uint32      cur;            /* current element */
     135             :     uint32      end;
     136             :     bool        done;           /* iterator exhausted? */
     137             : }           SH_ITERATOR;
     138             : 
     139             : /* externally visible function prototypes */
     140             : SH_SCOPE    SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
     141             :           void *private_data);
     142             : SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
     143             : SH_SCOPE void SH_GROW(SH_TYPE * tb, uint32 newsize);
     144             : SH_SCOPE    SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
     145             : SH_SCOPE    SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
     146             : SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
     147             : SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     148             : SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
     149             : SH_SCOPE    SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
     150             : SH_SCOPE void SH_STAT(SH_TYPE * tb);
     151             : 
     152             : #endif                          /* SH_DECLARE */
     153             : 
     154             : 
     155             : /* generate implementation of the hash table */
     156             : #ifdef SH_DEFINE
     157             : 
     158             : #include "utils/memutils.h"
     159             : 
     160             : /* max data array size,we allow up to PG_UINT32_MAX buckets, including 0 */
     161             : #define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
     162             : 
     163             : /* normal fillfactor, unless already close to maximum */
     164             : #ifndef SH_FILLFACTOR
     165             : #define SH_FILLFACTOR (0.9)
     166             : #endif
     167             : /* increase fillfactor if we otherwise would error out */
     168             : #define SH_MAX_FILLFACTOR (0.98)
     169             : /* grow if actual and optimal location bigger than */
     170             : #ifndef SH_GROW_MAX_DIB
     171             : #define SH_GROW_MAX_DIB 25
     172             : #endif
     173             : /* grow if more than elements to move when inserting */
     174             : #ifndef SH_GROW_MAX_MOVE
     175             : #define SH_GROW_MAX_MOVE 150
     176             : #endif
     177             : 
     178             : #ifdef SH_STORE_HASH
     179             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
     180             : #else
     181             : #define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
     182             : #endif
     183             : 
     184             : /* FIXME: can we move these to a central location? */
     185             : 
     186             : /* calculate ceil(log base 2) of num */
     187             : static inline uint64
     188        1136 : sh_log2(uint64 num)
     189             : {
     190             :     int         i;
     191             :     uint64      limit;
     192             : 
     193        1136 :     for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
     194             :         ;
     195        1136 :     return i;
     196             : }
     197             : 
     198             : /* calculate first power of 2 >= num */
     199             : static inline uint64
     200        1136 : sh_pow2(uint64 num)
     201             : {
     202        1136 :     return ((uint64) 1) << sh_log2(num);
     203             : }
     204             : 
     205             : /*
     206             :  * Compute sizing parameters for hashtable. Called when creating and growing
     207             :  * the hashtable.
     208             :  */
     209             : static inline void
     210        1030 : SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint32 newsize)
     211             : {
     212             :     uint64      size;
     213             : 
     214             :     /* supporting zero sized hashes would complicate matters */
     215        1030 :     size = Max(newsize, 2);
     216             : 
     217             :     /* round up size to the next power of 2, that's how bucketing works */
     218        1030 :     size = sh_pow2(size);
     219        1030 :     Assert(size <= SH_MAX_SIZE);
     220             : 
     221             :     /*
     222             :      * Verify that allocation of ->data is possible on this platform, without
     223             :      * overflowing Size.
     224             :      */
     225        1030 :     if ((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= MaxAllocHugeSize)
     226           0 :         elog(ERROR, "hash table too large");
     227             : 
     228             :     /* now set size */
     229        1030 :     tb->size = size;
     230             : 
     231        1030 :     if (tb->size == SH_MAX_SIZE)
     232           0 :         tb->sizemask = 0;
     233             :     else
     234        1030 :         tb->sizemask = tb->size - 1;
     235             : 
     236             :     /*
     237             :      * Compute the next threshold at which we need to grow the hash table
     238             :      * again.
     239             :      */
     240        1030 :     if (tb->size == SH_MAX_SIZE)
     241           0 :         tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
     242             :     else
     243        1030 :         tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
     244        1030 : }
     245             : 
     246             : /* return the optimal bucket for the hash */
     247             : static inline uint32
     248     1190176 : SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
     249             : {
     250     1190176 :     return hash & tb->sizemask;
     251             : }
     252             : 
     253             : /* return next bucket after the current, handling wraparound */
     254             : static inline uint32
     255      604028 : SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     256             : {
     257      604028 :     curelem = (curelem + 1) & tb->sizemask;
     258             : 
     259      604028 :     Assert(curelem != startelem);
     260             : 
     261      604028 :     return curelem;
     262             : }
     263             : 
     264             : /* return bucket before the current, handling wraparound */
     265             : static inline uint32
     266       36238 : SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
     267             : {
     268       36238 :     curelem = (curelem - 1) & tb->sizemask;
     269             : 
     270       36238 :     Assert(curelem != startelem);
     271             : 
     272       36238 :     return curelem;
     273             : }
     274             : 
     275             : /* return distance between bucket and its optimal position */
     276             : static inline uint32
     277      366557 : SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
     278             : {
     279      366557 :     if (optimal <= bucket)
     280      361990 :         return bucket - optimal;
     281             :     else
     282        4567 :         return (tb->size + bucket) - optimal;
     283             : }
     284             : 
     285             : static inline uint32
     286      377535 : SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
     287             : {
     288             : #ifdef SH_STORE_HASH
     289      121530 :     return SH_GET_HASH(tb, entry);
     290             : #else
     291      256005 :     return SH_HASH_KEY(tb, entry->SH_KEY);
     292             : #endif
     293             : }
     294             : 
     295             : /* default memory allocator function */
     296             : static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
     297             : static inline void SH_FREE(SH_TYPE * type, void *pointer);
     298             : 
     299             : #ifndef SH_USE_NONDEFAULT_ALLOCATOR
     300             : 
     301             : /* default memory allocator function */
     302             : static inline void *
     303         604 : SH_ALLOCATE(SH_TYPE * type, Size size)
     304             : {
     305         604 :     return MemoryContextAllocExtended(type->ctx, size,
     306             :                                       MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
     307             : }
     308             : 
     309             : /* default memory free function */
     310             : static inline void
     311          84 : SH_FREE(SH_TYPE * type, void *pointer)
     312             : {
     313          84 :     pfree(pointer);
     314          84 : }
     315             : 
     316             : #endif
     317             : 
     318             : /*
     319             :  * Create a hash table with enough space for `nelements` distinct members.
     320             :  * Memory for the hash table is allocated from the passed-in context.  If
     321             :  * desired, the array of elements can be allocated using a passed-in allocator;
     322             :  * this could be useful in order to place the array of elements in a shared
     323             :  * memory, or in a context that will outlive the rest of the hash table.
     324             :  * Memory other than for the array of elements will still be allocated from
     325             :  * the passed-in context.
     326             :  */
     327             : SH_SCOPE    SH_TYPE *
     328         924 : SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
     329             : {
     330             :     SH_TYPE    *tb;
     331             :     uint64      size;
     332             : 
     333         924 :     tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
     334         924 :     tb->ctx = ctx;
     335         924 :     tb->private_data = private_data;
     336             : 
     337             :     /* increase nelements by fillfactor, want to store nelements elements */
     338         924 :     size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
     339             : 
     340         924 :     SH_COMPUTE_PARAMETERS(tb, size);
     341             : 
     342         924 :     tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
     343             : 
     344         924 :     return tb;
     345             : }
     346             : 
     347             : /* destroy a previously created hash table */
     348             : SH_SCOPE void
     349         404 : SH_DESTROY(SH_TYPE * tb)
     350             : {
     351         404 :     SH_FREE(tb, tb->data);
     352         404 :     pfree(tb);
     353         404 : }
     354             : 
     355             : /*
     356             :  * Grow a hash table to at least `newsize` buckets.
     357             :  *
     358             :  * Usually this will automatically be called by insertions/deletions, when
     359             :  * necessary. But resizing to the exact input size can be advantageous
     360             :  * performance-wise, when known at some point.
     361             :  */
     362             : SH_SCOPE void
     363         106 : SH_GROW(SH_TYPE * tb, uint32 newsize)
     364             : {
     365         106 :     uint64      oldsize = tb->size;
     366         106 :     SH_ELEMENT_TYPE *olddata = tb->data;
     367             :     SH_ELEMENT_TYPE *newdata;
     368             :     uint32      i;
     369         106 :     uint32      startelem = 0;
     370             :     uint32      copyelem;
     371             : 
     372         106 :     Assert(oldsize == sh_pow2(oldsize));
     373         106 :     Assert(oldsize != SH_MAX_SIZE);
     374         106 :     Assert(oldsize < newsize);
     375             : 
     376             :     /* compute parameters for new table */
     377         106 :     SH_COMPUTE_PARAMETERS(tb, newsize);
     378             : 
     379         106 :     tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
     380             : 
     381         106 :     newdata = tb->data;
     382             : 
     383             :     /*
     384             :      * Copy entries from the old data to newdata. We theoretically could use
     385             :      * SH_INSERT here, to avoid code duplication, but that's more general than
     386             :      * we need. We neither want tb->members increased, nor do we need to do
     387             :      * deal with deleted elements, nor do we need to compare keys. So a
     388             :      * special-cased implementation is lot faster. As resizing can be time
     389             :      * consuming and frequent, that's worthwhile to optimize.
     390             :      *
     391             :      * To be able to simply move entries over, we have to start not at the
     392             :      * first bucket (i.e olddata[0]), but find the first bucket that's either
     393             :      * empty, or is occupied by an entry at its optimal position. Such a
     394             :      * bucket has to exist in any table with a load factor under 1, as not all
     395             :      * buckets are occupied, i.e. there always has to be an empty bucket.  By
     396             :      * starting at such a bucket we can move the entries to the larger table,
     397             :      * without having to deal with conflicts.
     398             :      */
     399             : 
     400             :     /* search for the first element in the hash that's not wrapped around */
     401         931 :     for (i = 0; i < oldsize; i++)
     402             :     {
     403         931 :         SH_ELEMENT_TYPE *oldentry = &olddata[i];
     404             :         uint32      hash;
     405             :         uint32      optimal;
     406             : 
     407         931 :         if (oldentry->status != SH_STATUS_IN_USE)
     408             :         {
     409          79 :             startelem = i;
     410          79 :             break;
     411             :         }
     412             : 
     413         852 :         hash = SH_ENTRY_HASH(tb, oldentry);
     414         852 :         optimal = SH_INITIAL_BUCKET(tb, hash);
     415             : 
     416         852 :         if (optimal == i)
     417             :         {
     418          27 :             startelem = i;
     419          27 :             break;
     420             :         }
     421             :     }
     422             : 
     423             :     /* and copy all elements in the old table */
     424         106 :     copyelem = startelem;
     425       11346 :     for (i = 0; i < oldsize; i++)
     426             :     {
     427       11240 :         SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
     428             : 
     429       11240 :         if (oldentry->status == SH_STATUS_IN_USE)
     430             :         {
     431             :             uint32      hash;
     432             :             uint32      startelem;
     433             :             uint32      curelem;
     434             :             SH_ELEMENT_TYPE *newentry;
     435             : 
     436       10024 :             hash = SH_ENTRY_HASH(tb, oldentry);
     437       10024 :             startelem = SH_INITIAL_BUCKET(tb, hash);
     438       10024 :             curelem = startelem;
     439             : 
     440             :             /* find empty element to put data into */
     441             :             while (true)
     442             :             {
     443       13840 :                 newentry = &newdata[curelem];
     444             : 
     445       13840 :                 if (newentry->status == SH_STATUS_EMPTY)
     446             :                 {
     447       10024 :                     break;
     448             :                 }
     449             : 
     450        3816 :                 curelem = SH_NEXT(tb, curelem, startelem);
     451        3816 :             }
     452             : 
     453             :             /* copy entry to new slot */
     454       10024 :             memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
     455             :         }
     456             : 
     457             :         /* can't use SH_NEXT here, would use new size */
     458       11240 :         copyelem++;
     459       11240 :         if (copyelem >= oldsize)
     460             :         {
     461         106 :             copyelem = 0;
     462             :         }
     463             :     }
     464             : 
     465         106 :     SH_FREE(tb, olddata);
     466         106 : }
     467             : 
     468             : /*
     469             :  * Insert the key key into the hash-table, set *found to true if the key
     470             :  * already exists, false otherwise. Returns the hash-table entry in either
     471             :  * case.
     472             :  */
     473             : SH_SCOPE    SH_ELEMENT_TYPE *
     474      702368 : SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
     475             : {
     476      702368 :     uint32      hash = SH_HASH_KEY(tb, key);
     477             :     uint32      startelem;
     478             :     uint32      curelem;
     479             :     SH_ELEMENT_TYPE *data;
     480             :     uint32      insertdist;
     481             : 
     482             : restart:
     483      702369 :     insertdist = 0;
     484             : 
     485             :     /*
     486             :      * We do the grow check even if the key is actually present, to avoid
     487             :      * doing the check inside the loop. This also lets us avoid having to
     488             :      * re-find our position in the hashtable after resizing.
     489             :      *
     490             :      * Note that this also reached when resizing the table due to
     491             :      * SH_GROW_MAX_DIB / SH_GROW_MAX_MOVE.
     492             :      */
     493      702369 :     if (unlikely(tb->members >= tb->grow_threshold))
     494             :     {
     495         106 :         if (tb->size == SH_MAX_SIZE)
     496             :         {
     497           0 :             elog(ERROR, "hash table size exceeded");
     498             :         }
     499             : 
     500             :         /*
     501             :          * When optimizing, it can be very useful to print these out.
     502             :          */
     503             :         /* SH_STAT(tb); */
     504         106 :         SH_GROW(tb, tb->size * 2);
     505             :         /* SH_STAT(tb); */
     506             :     }
     507             : 
     508             :     /* perform insert, start bucket search at optimal location */
     509      702369 :     data = tb->data;
     510      702369 :     startelem = SH_INITIAL_BUCKET(tb, hash);
     511      702369 :     curelem = startelem;
     512             :     while (true)
     513             :     {
     514             :         uint32      curdist;
     515             :         uint32      curhash;
     516             :         uint32      curoptimal;
     517     1062276 :         SH_ELEMENT_TYPE *entry = &data[curelem];
     518             : 
     519             :         /* any empty bucket can directly be used */
     520     1062276 :         if (entry->status == SH_STATUS_EMPTY)
     521             :         {
     522       36555 :             tb->members++;
     523       36555 :             entry->SH_KEY = key;
     524             : #ifdef SH_STORE_HASH
     525       27582 :             SH_GET_HASH(tb, entry) = hash;
     526             : #endif
     527       36555 :             entry->status = SH_STATUS_IN_USE;
     528       36555 :             *found = false;
     529       36555 :             return entry;
     530             :         }
     531             : 
     532             :         /*
     533             :          * If the bucket is not empty, we either found a match (in which case
     534             :          * we're done), or we have to decide whether to skip over or move the
     535             :          * colliding entry. When the colliding element's distance to its
     536             :          * optimal position is smaller than the to-be-inserted entry's, we
     537             :          * shift the colliding entry (and its followers) forward by one.
     538             :          */
     539             : 
     540     1025721 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     541             :         {
     542      659164 :             Assert(entry->status == SH_STATUS_IN_USE);
     543      659164 :             *found = true;
     544      659164 :             return entry;
     545             :         }
     546             : 
     547      366557 :         curhash = SH_ENTRY_HASH(tb, entry);
     548      366557 :         curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     549      366557 :         curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
     550             : 
     551      366557 :         if (insertdist > curdist)
     552             :         {
     553        6650 :             SH_ELEMENT_TYPE *lastentry = entry;
     554        6650 :             uint32      emptyelem = curelem;
     555             :             uint32      moveelem;
     556        6650 :             int32       emptydist = 0;
     557             : 
     558             :             /* find next empty bucket */
     559             :             while (true)
     560             :             {
     561             :                 SH_ELEMENT_TYPE *emptyentry;
     562             : 
     563       36389 :                 emptyelem = SH_NEXT(tb, emptyelem, startelem);
     564       36389 :                 emptyentry = &data[emptyelem];
     565             : 
     566       36389 :                 if (emptyentry->status == SH_STATUS_EMPTY)
     567             :                 {
     568        6649 :                     lastentry = emptyentry;
     569        6649 :                     break;
     570             :                 }
     571             : 
     572             :                 /*
     573             :                  * To avoid negative consequences from overly imbalanced
     574             :                  * hashtables, grow the hashtable if collisions would require
     575             :                  * us to move a lot of entries.  The most likely cause of such
     576             :                  * imbalance is filling a (currently) small table, from a
     577             :                  * currently big one, in hash-table order.
     578             :                  */
     579       29740 :                 if (++emptydist > SH_GROW_MAX_MOVE)
     580             :                 {
     581           1 :                     tb->grow_threshold = 0;
     582           1 :                     goto restart;
     583             :                 }
     584       29739 :             }
     585             : 
     586             :             /* shift forward, starting at last occupied element */
     587             : 
     588             :             /*
     589             :              * TODO: This could be optimized to be one memcpy in may cases,
     590             :              * excepting wrapping around at the end of ->data. Hasn't shown up
     591             :              * in profiles so far though.
     592             :              */
     593        6649 :             moveelem = emptyelem;
     594       49536 :             while (moveelem != curelem)
     595             :             {
     596             :                 SH_ELEMENT_TYPE *moveentry;
     597             : 
     598       36238 :                 moveelem = SH_PREV(tb, moveelem, startelem);
     599       36238 :                 moveentry = &data[moveelem];
     600             : 
     601       36238 :                 memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
     602       36238 :                 lastentry = moveentry;
     603             :             }
     604             : 
     605             :             /* and fill the now empty spot */
     606        6649 :             tb->members++;
     607             : 
     608        6649 :             entry->SH_KEY = key;
     609             : #ifdef SH_STORE_HASH
     610        3839 :             SH_GET_HASH(tb, entry) = hash;
     611             : #endif
     612        6649 :             entry->status = SH_STATUS_IN_USE;
     613        6649 :             *found = false;
     614        6649 :             return entry;
     615             :         }
     616             : 
     617      359907 :         curelem = SH_NEXT(tb, curelem, startelem);
     618      359907 :         insertdist++;
     619             : 
     620             :         /*
     621             :          * To avoid negative consequences from overly imbalanced hashtables,
     622             :          * grow the hashtable if collisions lead to large runs. The most
     623             :          * likely cause of such imbalance is filling a (currently) small
     624             :          * table, from a currently big one, in hash-table order.
     625             :          */
     626      359907 :         if (insertdist > SH_GROW_MAX_DIB)
     627             :         {
     628           0 :             tb->grow_threshold = 0;
     629           0 :             goto restart;
     630             :         }
     631      359907 :     }
     632             : }
     633             : 
     634             : /*
     635             :  * Lookup up entry in hash table.  Returns NULL if key not present.
     636             :  */
     637             : SH_SCOPE    SH_ELEMENT_TYPE *
     638       89038 : SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
     639             : {
     640       89038 :     uint32      hash = SH_HASH_KEY(tb, key);
     641       89038 :     const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
     642       89038 :     uint32      curelem = startelem;
     643             : 
     644             :     while (true)
     645             :     {
     646      287571 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     647             : 
     648      287571 :         if (entry->status == SH_STATUS_EMPTY)
     649             :         {
     650       65071 :             return NULL;
     651             :         }
     652             : 
     653      222500 :         Assert(entry->status == SH_STATUS_IN_USE);
     654             : 
     655      222500 :         if (SH_COMPARE_KEYS(tb, hash, key, entry))
     656       23967 :             return entry;
     657             : 
     658             :         /*
     659             :          * TODO: we could stop search based on distance. If the current
     660             :          * buckets's distance-from-optimal is smaller than what we've skipped
     661             :          * already, the entry doesn't exist. Probably only do so if
     662             :          * SH_STORE_HASH is defined, to avoid re-computing hashes?
     663             :          */
     664             : 
     665      198533 :         curelem = SH_NEXT(tb, curelem, startelem);
     666      198533 :     }
     667             : }
     668             : 
     669             : /*
     670             :  * Delete entry from hash table.  Returns whether to-be-deleted key was
     671             :  * present.
     672             :  */
     673             : SH_SCOPE bool
     674       21234 : SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
     675             : {
     676       21234 :     uint32      hash = SH_HASH_KEY(tb, key);
     677       21234 :     uint32      startelem = SH_INITIAL_BUCKET(tb, hash);
     678       21234 :     uint32      curelem = startelem;
     679             : 
     680             :     while (true)
     681             :     {
     682       23526 :         SH_ELEMENT_TYPE *entry = &tb->data[curelem];
     683             : 
     684       23526 :         if (entry->status == SH_STATUS_EMPTY)
     685       18227 :             return false;
     686             : 
     687       10598 :         if (entry->status == SH_STATUS_IN_USE &&
     688        5299 :             SH_COMPARE_KEYS(tb, hash, key, entry))
     689             :         {
     690        3007 :             SH_ELEMENT_TYPE *lastentry = entry;
     691             : 
     692        3007 :             tb->members--;
     693             : 
     694             :             /*
     695             :              * Backward shift following elements till either an empty element
     696             :              * or an element at its optimal position is encountered.
     697             :              *
     698             :              * While that sounds expensive, the average chain length is short,
     699             :              * and deletions would otherwise require tombstones.
     700             :              */
     701             :             while (true)
     702             :             {
     703             :                 SH_ELEMENT_TYPE *curentry;
     704             :                 uint32      curhash;
     705             :                 uint32      curoptimal;
     706             : 
     707        3091 :                 curelem = SH_NEXT(tb, curelem, startelem);
     708        3091 :                 curentry = &tb->data[curelem];
     709             : 
     710        3091 :                 if (curentry->status != SH_STATUS_IN_USE)
     711             :                 {
     712        2989 :                     lastentry->status = SH_STATUS_EMPTY;
     713        2989 :                     break;
     714             :                 }
     715             : 
     716         102 :                 curhash = SH_ENTRY_HASH(tb, curentry);
     717         102 :                 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
     718             : 
     719             :                 /* current is at optimal position, done */
     720         102 :                 if (curoptimal == curelem)
     721             :                 {
     722          18 :                     lastentry->status = SH_STATUS_EMPTY;
     723          18 :                     break;
     724             :                 }
     725             : 
     726             :                 /* shift */
     727          84 :                 memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
     728             : 
     729          84 :                 lastentry = curentry;
     730          84 :             }
     731             : 
     732        3007 :             return true;
     733             :         }
     734             : 
     735             :         /* TODO: return false; if distance too big */
     736             : 
     737        2292 :         curelem = SH_NEXT(tb, curelem, startelem);
     738        2292 :     }
     739             : }
     740             : 
     741             : /*
     742             :  * Initialize iterator.
     743             :  */
     744             : SH_SCOPE void
     745         885 : SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
     746             : {
     747             :     int         i;
     748         885 :     uint64      startelem = PG_UINT64_MAX;
     749             : 
     750             :     /*
     751             :      * Search for the first empty element. As deletions during iterations are
     752             :      * supported, we want to start/end at an element that cannot be affected
     753             :      * by elements being shifted.
     754             :      */
     755        1487 :     for (i = 0; i < tb->size; i++)
     756             :     {
     757        1487 :         SH_ELEMENT_TYPE *entry = &tb->data[i];
     758             : 
     759        1487 :         if (entry->status != SH_STATUS_IN_USE)
     760             :         {
     761         885 :             startelem = i;
     762         885 :             break;
     763             :         }
     764             :     }
     765             : 
     766         885 :     Assert(startelem < SH_MAX_SIZE);
     767             : 
     768             :     /*
     769             :      * Iterate backwards, that allows the current element to be deleted, even
     770             :      * if there are backward shifts
     771             :      */
     772         885 :     iter->cur = startelem;
     773         885 :     iter->end = iter->cur;
     774         885 :     iter->done = false;
     775         885 : }
     776             : 
     777             : /*
     778             :  * Initialize iterator to a specific bucket. That's really only useful for
     779             :  * cases where callers are partially iterating over the hashspace, and that
     780             :  * iteration deletes and inserts elements based on visited entries. Doing that
     781             :  * repeatedly could lead to an unbalanced keyspace when always starting at the
     782             :  * same position.
     783             :  */
     784             : SH_SCOPE void
     785           4 : SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
     786             : {
     787             :     /*
     788             :      * Iterate backwards, that allows the current element to be deleted, even
     789             :      * if there are backward shifts.
     790             :      */
     791           4 :     iter->cur = at & tb->sizemask;    /* ensure at is within a valid range */
     792           4 :     iter->end = iter->cur;
     793           4 :     iter->done = false;
     794           4 : }
     795             : 
     796             : /*
     797             :  * Iterate over all entries in the hash-table. Return the next occupied entry,
     798             :  * or NULL if done.
     799             :  *
     800             :  * During iteration the current entry in the hash table may be deleted,
     801             :  * without leading to elements being skipped or returned twice.  Additionally
     802             :  * the rest of the table may be modified (i.e. there can be insertions or
     803             :  * deletions), but if so, there's neither a guarantee that all nodes are
     804             :  * visited at least once, nor a guarantee that a node is visited at most once.
     805             :  */
     806             : SH_SCOPE    SH_ELEMENT_TYPE *
     807       32245 : SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
     808             : {
     809      289874 :     while (!iter->done)
     810             :     {
     811             :         SH_ELEMENT_TYPE *elem;
     812             : 
     813      256759 :         elem = &tb->data[iter->cur];
     814             : 
     815             :         /* next element in backward direction */
     816      256759 :         iter->cur = (iter->cur - 1) & tb->sizemask;
     817             : 
     818      256759 :         if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
     819         870 :             iter->done = true;
     820      256759 :         if (elem->status == SH_STATUS_IN_USE)
     821             :         {
     822       31375 :             return elem;
     823             :         }
     824             :     }
     825             : 
     826         870 :     return NULL;
     827             : }
     828             : 
     829             : /*
     830             :  * Report some statistics about the state of the hashtable. For
     831             :  * debugging/profiling purposes only.
     832             :  */
     833             : SH_SCOPE void
     834           0 : SH_STAT(SH_TYPE * tb)
     835             : {
     836           0 :     uint32      max_chain_length = 0;
     837           0 :     uint32      total_chain_length = 0;
     838             :     double      avg_chain_length;
     839             :     double      fillfactor;
     840             :     uint32      i;
     841             : 
     842           0 :     uint32     *collisions = palloc0(tb->size * sizeof(uint32));
     843           0 :     uint32      total_collisions = 0;
     844           0 :     uint32      max_collisions = 0;
     845             :     double      avg_collisions;
     846             : 
     847           0 :     for (i = 0; i < tb->size; i++)
     848             :     {
     849             :         uint32      hash;
     850             :         uint32      optimal;
     851             :         uint32      dist;
     852             :         SH_ELEMENT_TYPE *elem;
     853             : 
     854           0 :         elem = &tb->data[i];
     855             : 
     856           0 :         if (elem->status != SH_STATUS_IN_USE)
     857           0 :             continue;
     858             : 
     859           0 :         hash = SH_ENTRY_HASH(tb, elem);
     860           0 :         optimal = SH_INITIAL_BUCKET(tb, hash);
     861           0 :         dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
     862             : 
     863           0 :         if (dist > max_chain_length)
     864           0 :             max_chain_length = dist;
     865           0 :         total_chain_length += dist;
     866             : 
     867           0 :         collisions[optimal]++;
     868             :     }
     869             : 
     870           0 :     for (i = 0; i < tb->size; i++)
     871             :     {
     872           0 :         uint32      curcoll = collisions[i];
     873             : 
     874           0 :         if (curcoll == 0)
     875           0 :             continue;
     876             : 
     877             :         /* single contained element is not a collision */
     878           0 :         curcoll--;
     879           0 :         total_collisions += curcoll;
     880           0 :         if (curcoll > max_collisions)
     881           0 :             max_collisions = curcoll;
     882             :     }
     883             : 
     884           0 :     if (tb->members > 0)
     885             :     {
     886           0 :         fillfactor = tb->members / ((double) tb->size);
     887           0 :         avg_chain_length = ((double) total_chain_length) / tb->members;
     888           0 :         avg_collisions = ((double) total_collisions) / tb->members;
     889             :     }
     890             :     else
     891             :     {
     892           0 :         fillfactor = 0;
     893           0 :         avg_chain_length = 0;
     894           0 :         avg_collisions = 0;
     895             :     }
     896             : 
     897           0 :     elog(LOG, "size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %i, avg_collisions: %f",
     898             :          tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
     899             :          total_collisions, max_collisions, avg_collisions);
     900           0 : }
     901             : 
     902             : #endif                          /* SH_DEFINE */
     903             : 
     904             : 
     905             : /* undefine external parameters, so next hash table can be defined */
     906             : #undef SH_PREFIX
     907             : #undef SH_KEY_TYPE
     908             : #undef SH_KEY
     909             : #undef SH_ELEMENT_TYPE
     910             : #undef SH_HASH_KEY
     911             : #undef SH_SCOPE
     912             : #undef SH_DECLARE
     913             : #undef SH_DEFINE
     914             : #undef SH_GET_HASH
     915             : #undef SH_STORE_HASH
     916             : #undef SH_USE_NONDEFAULT_ALLOCATOR
     917             : 
     918             : /* undefine locally declared macros */
     919             : #undef SH_MAKE_PREFIX
     920             : #undef SH_MAKE_NAME
     921             : #undef SH_MAKE_NAME_
     922             : #undef SH_FILLFACTOR
     923             : #undef SH_MAX_FILLFACTOR
     924             : #undef SH_GROW_MAX_DIB
     925             : #undef SH_GROW_MAX_MOVE
     926             : #undef SH_MAX_SIZE
     927             : 
     928             : /* types */
     929             : #undef SH_TYPE
     930             : #undef SH_STATUS
     931             : #undef SH_STATUS_EMPTY
     932             : #undef SH_STATUS_IN_USE
     933             : #undef SH_ITERATOR
     934             : 
     935             : /* external function names */
     936             : #undef SH_CREATE
     937             : #undef SH_DESTROY
     938             : #undef SH_INSERT
     939             : #undef SH_DELETE
     940             : #undef SH_LOOKUP
     941             : #undef SH_GROW
     942             : #undef SH_START_ITERATE
     943             : #undef SH_START_ITERATE_AT
     944             : #undef SH_ITERATE
     945             : #undef SH_ALLOCATE
     946             : #undef SH_FREE
     947             : #undef SH_STAT
     948             : 
     949             : /* internal function names */
     950             : #undef SH_COMPUTE_PARAMETERS
     951             : #undef SH_COMPARE_KEYS
     952             : #undef SH_INITIAL_BUCKET
     953             : #undef SH_NEXT
     954             : #undef SH_PREV
     955             : #undef SH_DISTANCE_FROM_OPTIMAL
     956             : #undef SH_ENTRY_HASH

Generated by: LCOV version 1.11