Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashfunc.c
4 : * Support functions for hash access method.
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hashfunc.c
12 : *
13 : * NOTES
14 : * These functions are stored in pg_amproc. For each operator class
15 : * defined for hash indexes, they compute the hash value of the argument.
16 : *
17 : * Additional hash functions appear in /utils/adt/ files for various
18 : * specialized datatypes.
19 : *
20 : * It is expected that every bit of a hash function's 32-bit result is
21 : * as random as every other; failure to ensure this is likely to lead
22 : * to poor performance of hash joins, for example. In most cases a hash
23 : * function should use hash_any() or its variant hash_uint32().
24 : *-------------------------------------------------------------------------
25 : */
26 :
27 : #include "postgres.h"
28 :
29 : #include "access/hash.h"
30 : #include "utils/builtins.h"
31 :
32 : /*
33 : * Datatype-specific hash functions.
34 : *
35 : * These support both hash indexes and hash joins.
36 : *
37 : * NOTE: some of these are also used by catcache operations, without
38 : * any direct connection to hash indexes. Also, the common hash_any
39 : * routine is also used by dynahash tables.
40 : */
41 :
42 : /* Note: this is used for both "char" and boolean datatypes */
43 : Datum
44 174597 : hashchar(PG_FUNCTION_ARGS)
45 : {
46 174597 : return hash_uint32((int32) PG_GETARG_CHAR(0));
47 : }
48 :
49 : Datum
50 11 : hashcharextended(PG_FUNCTION_ARGS)
51 : {
52 11 : return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
53 : }
54 :
55 : Datum
56 338044 : hashint2(PG_FUNCTION_ARGS)
57 : {
58 338044 : return hash_uint32((int32) PG_GETARG_INT16(0));
59 : }
60 :
61 : Datum
62 8 : hashint2extended(PG_FUNCTION_ARGS)
63 : {
64 8 : return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
65 : }
66 :
67 : Datum
68 698220 : hashint4(PG_FUNCTION_ARGS)
69 : {
70 698220 : return hash_uint32(PG_GETARG_INT32(0));
71 : }
72 :
73 : Datum
74 70 : hashint4extended(PG_FUNCTION_ARGS)
75 : {
76 70 : return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
77 : }
78 :
79 : Datum
80 1023 : hashint8(PG_FUNCTION_ARGS)
81 : {
82 : /*
83 : * The idea here is to produce a hash value compatible with the values
84 : * produced by hashint4 and hashint2 for logically equal inputs; this is
85 : * necessary to support cross-type hash joins across these input types.
86 : * Since all three types are signed, we can xor the high half of the int8
87 : * value if the sign is positive, or the complement of the high half when
88 : * the sign is negative.
89 : */
90 1023 : int64 val = PG_GETARG_INT64(0);
91 1023 : uint32 lohalf = (uint32) val;
92 1023 : uint32 hihalf = (uint32) (val >> 32);
93 :
94 1023 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
95 :
96 1023 : return hash_uint32(lohalf);
97 : }
98 :
99 : Datum
100 62 : hashint8extended(PG_FUNCTION_ARGS)
101 : {
102 : /* Same approach as hashint8 */
103 62 : int64 val = PG_GETARG_INT64(0);
104 62 : uint32 lohalf = (uint32) val;
105 62 : uint32 hihalf = (uint32) (val >> 32);
106 :
107 62 : lohalf ^= (val >= 0) ? hihalf : ~hihalf;
108 :
109 62 : return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
110 : }
111 :
112 : Datum
113 3340571 : hashoid(PG_FUNCTION_ARGS)
114 : {
115 3340571 : return hash_uint32((uint32) PG_GETARG_OID(0));
116 : }
117 :
118 : Datum
119 12 : hashoidextended(PG_FUNCTION_ARGS)
120 : {
121 12 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
122 : }
123 :
124 : Datum
125 13 : hashenum(PG_FUNCTION_ARGS)
126 : {
127 13 : return hash_uint32((uint32) PG_GETARG_OID(0));
128 : }
129 :
130 : Datum
131 6 : hashenumextended(PG_FUNCTION_ARGS)
132 : {
133 6 : return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
134 : }
135 :
136 : Datum
137 13 : hashfloat4(PG_FUNCTION_ARGS)
138 : {
139 13 : float4 key = PG_GETARG_FLOAT4(0);
140 : float8 key8;
141 :
142 : /*
143 : * On IEEE-float machines, minus zero and zero have different bit patterns
144 : * but should compare as equal. We must ensure that they have the same
145 : * hash value, which is most reliably done this way:
146 : */
147 13 : if (key == (float4) 0)
148 2 : PG_RETURN_UINT32(0);
149 :
150 : /*
151 : * To support cross-type hashing of float8 and float4, we want to return
152 : * the same hash value hashfloat8 would produce for an equal float8 value.
153 : * So, widen the value to float8 and hash that. (We must do this rather
154 : * than have hashfloat8 try to narrow its value to float4; that could fail
155 : * on overflow.)
156 : */
157 11 : key8 = key;
158 :
159 11 : return hash_any((unsigned char *) &key8, sizeof(key8));
160 : }
161 :
162 : Datum
163 12 : hashfloat4extended(PG_FUNCTION_ARGS)
164 : {
165 12 : float4 key = PG_GETARG_FLOAT4(0);
166 12 : uint64 seed = PG_GETARG_INT64(1);
167 : float8 key8;
168 :
169 : /* Same approach as hashfloat4 */
170 12 : if (key == (float4) 0)
171 2 : PG_RETURN_UINT64(seed);
172 10 : key8 = key;
173 :
174 10 : return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
175 : }
176 :
177 : Datum
178 10112 : hashfloat8(PG_FUNCTION_ARGS)
179 : {
180 10112 : float8 key = PG_GETARG_FLOAT8(0);
181 :
182 : /*
183 : * On IEEE-float machines, minus zero and zero have different bit patterns
184 : * but should compare as equal. We must ensure that they have the same
185 : * hash value, which is most reliably done this way:
186 : */
187 10112 : if (key == (float8) 0)
188 28 : PG_RETURN_UINT32(0);
189 :
190 10084 : return hash_any((unsigned char *) &key, sizeof(key));
191 : }
192 :
193 : Datum
194 12 : hashfloat8extended(PG_FUNCTION_ARGS)
195 : {
196 12 : float8 key = PG_GETARG_FLOAT8(0);
197 12 : uint64 seed = PG_GETARG_INT64(1);
198 :
199 : /* Same approach as hashfloat8 */
200 12 : if (key == (float8) 0)
201 2 : PG_RETURN_UINT64(seed);
202 :
203 10 : return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
204 : }
205 :
206 : Datum
207 18291 : hashoidvector(PG_FUNCTION_ARGS)
208 : {
209 18291 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
210 :
211 18291 : return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
212 : }
213 :
214 : Datum
215 10 : hashoidvectorextended(PG_FUNCTION_ARGS)
216 : {
217 10 : oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
218 :
219 20 : return hash_any_extended((unsigned char *) key->values,
220 10 : key->dim1 * sizeof(Oid),
221 10 : PG_GETARG_INT64(1));
222 : }
223 :
224 : Datum
225 317806 : hashname(PG_FUNCTION_ARGS)
226 : {
227 317806 : char *key = NameStr(*PG_GETARG_NAME(0));
228 :
229 317806 : return hash_any((unsigned char *) key, strlen(key));
230 : }
231 :
232 : Datum
233 10 : hashnameextended(PG_FUNCTION_ARGS)
234 : {
235 10 : char *key = NameStr(*PG_GETARG_NAME(0));
236 :
237 10 : return hash_any_extended((unsigned char *) key, strlen(key),
238 10 : PG_GETARG_INT64(1));
239 : }
240 :
241 : Datum
242 17421 : hashtext(PG_FUNCTION_ARGS)
243 : {
244 17421 : text *key = PG_GETARG_TEXT_PP(0);
245 : Datum result;
246 :
247 : /*
248 : * Note: this is currently identical in behavior to hashvarlena, but keep
249 : * it as a separate function in case we someday want to do something
250 : * different in non-C locales. (See also hashbpchar, if so.)
251 : */
252 52263 : result = hash_any((unsigned char *) VARDATA_ANY(key),
253 52263 : VARSIZE_ANY_EXHDR(key));
254 :
255 : /* Avoid leaking memory for toasted inputs */
256 17421 : PG_FREE_IF_COPY(key, 0);
257 :
258 17421 : return result;
259 : }
260 :
261 : Datum
262 10 : hashtextextended(PG_FUNCTION_ARGS)
263 : {
264 10 : text *key = PG_GETARG_TEXT_PP(0);
265 : Datum result;
266 :
267 : /* Same approach as hashtext */
268 40 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
269 30 : VARSIZE_ANY_EXHDR(key),
270 10 : PG_GETARG_INT64(1));
271 :
272 10 : PG_FREE_IF_COPY(key, 0);
273 :
274 10 : return result;
275 : }
276 :
277 : /*
278 : * hashvarlena() can be used for any varlena datatype in which there are
279 : * no non-significant bits, ie, distinct bitpatterns never compare as equal.
280 : */
281 : Datum
282 656 : hashvarlena(PG_FUNCTION_ARGS)
283 : {
284 656 : struct varlena *key = PG_GETARG_VARLENA_PP(0);
285 : Datum result;
286 :
287 1968 : result = hash_any((unsigned char *) VARDATA_ANY(key),
288 1968 : VARSIZE_ANY_EXHDR(key));
289 :
290 : /* Avoid leaking memory for toasted inputs */
291 656 : PG_FREE_IF_COPY(key, 0);
292 :
293 656 : return result;
294 : }
295 :
296 : Datum
297 0 : hashvarlenaextended(PG_FUNCTION_ARGS)
298 : {
299 0 : struct varlena *key = PG_GETARG_VARLENA_PP(0);
300 : Datum result;
301 :
302 0 : result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
303 0 : VARSIZE_ANY_EXHDR(key),
304 0 : PG_GETARG_INT64(1));
305 :
306 0 : PG_FREE_IF_COPY(key, 0);
307 :
308 0 : return result;
309 : }
310 :
311 : /*
312 : * This hash function was written by Bob Jenkins
313 : * (bob_jenkins@burtleburtle.net), and superficially adapted
314 : * for PostgreSQL by Neil Conway. For more information on this
315 : * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
316 : * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
317 : *
318 : * In the current code, we have adopted Bob's 2006 update of his hash
319 : * function to fetch the data a word at a time when it is suitably aligned.
320 : * This makes for a useful speedup, at the cost of having to maintain
321 : * four code paths (aligned vs unaligned, and little-endian vs big-endian).
322 : * It also uses two separate mixing functions mix() and final(), instead
323 : * of a slower multi-purpose function.
324 : */
325 :
326 : /* Get a bit mask of the bits set in non-uint32 aligned addresses */
327 : #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
328 :
329 : /* Rotate a uint32 value left by k bits - note multiple evaluation! */
330 : #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
331 :
332 : /*----------
333 : * mix -- mix 3 32-bit values reversibly.
334 : *
335 : * This is reversible, so any information in (a,b,c) before mix() is
336 : * still in (a,b,c) after mix().
337 : *
338 : * If four pairs of (a,b,c) inputs are run through mix(), or through
339 : * mix() in reverse, there are at least 32 bits of the output that
340 : * are sometimes the same for one pair and different for another pair.
341 : * This was tested for:
342 : * * pairs that differed by one bit, by two bits, in any combination
343 : * of top bits of (a,b,c), or in any combination of bottom bits of
344 : * (a,b,c).
345 : * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
346 : * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
347 : * is commonly produced by subtraction) look like a single 1-bit
348 : * difference.
349 : * * the base values were pseudorandom, all zero but one bit set, or
350 : * all zero plus a counter that starts at zero.
351 : *
352 : * This does not achieve avalanche. There are input bits of (a,b,c)
353 : * that fail to affect some output bits of (a,b,c), especially of a. The
354 : * most thoroughly mixed value is c, but it doesn't really even achieve
355 : * avalanche in c.
356 : *
357 : * This allows some parallelism. Read-after-writes are good at doubling
358 : * the number of bits affected, so the goal of mixing pulls in the opposite
359 : * direction from the goal of parallelism. I did what I could. Rotates
360 : * seem to cost as much as shifts on every machine I could lay my hands on,
361 : * and rotates are much kinder to the top and bottom bits, so I used rotates.
362 : *----------
363 : */
364 : #define mix(a,b,c) \
365 : { \
366 : a -= c; a ^= rot(c, 4); c += b; \
367 : b -= a; b ^= rot(a, 6); a += c; \
368 : c -= b; c ^= rot(b, 8); b += a; \
369 : a -= c; a ^= rot(c,16); c += b; \
370 : b -= a; b ^= rot(a,19); a += c; \
371 : c -= b; c ^= rot(b, 4); b += a; \
372 : }
373 :
374 : /*----------
375 : * final -- final mixing of 3 32-bit values (a,b,c) into c
376 : *
377 : * Pairs of (a,b,c) values differing in only a few bits will usually
378 : * produce values of c that look totally different. This was tested for
379 : * * pairs that differed by one bit, by two bits, in any combination
380 : * of top bits of (a,b,c), or in any combination of bottom bits of
381 : * (a,b,c).
382 : * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
383 : * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
384 : * is commonly produced by subtraction) look like a single 1-bit
385 : * difference.
386 : * * the base values were pseudorandom, all zero but one bit set, or
387 : * all zero plus a counter that starts at zero.
388 : *
389 : * The use of separate functions for mix() and final() allow for a
390 : * substantial performance increase since final() does not need to
391 : * do well in reverse, but is does need to affect all output bits.
392 : * mix(), on the other hand, does not need to affect all output
393 : * bits (affecting 32 bits is enough). The original hash function had
394 : * a single mixing operation that had to satisfy both sets of requirements
395 : * and was slower as a result.
396 : *----------
397 : */
398 : #define final(a,b,c) \
399 : { \
400 : c ^= b; c -= rot(b,14); \
401 : a ^= c; a -= rot(c,11); \
402 : b ^= a; b -= rot(a,25); \
403 : c ^= b; c -= rot(b,16); \
404 : a ^= c; a -= rot(c, 4); \
405 : b ^= a; b -= rot(a,14); \
406 : c ^= b; c -= rot(b,24); \
407 : }
408 :
409 : /*
410 : * hash_any() -- hash a variable-length key into a 32-bit value
411 : * k : the key (the unaligned variable-length array of bytes)
412 : * len : the length of the key, counting by bytes
413 : *
414 : * Returns a uint32 value. Every bit of the key affects every bit of
415 : * the return value. Every 1-bit and 2-bit delta achieves avalanche.
416 : * About 6*len+35 instructions. The best hash table sizes are powers
417 : * of 2. There is no need to do mod a prime (mod is sooo slow!).
418 : * If you need less than 32 bits, use a bitmask.
419 : *
420 : * This procedure must never throw elog(ERROR); the ResourceOwner code
421 : * relies on this not to fail.
422 : *
423 : * Note: we could easily change this function to return a 64-bit hash value
424 : * by using the final values of both b and c. b is perhaps a little less
425 : * well mixed than c, however.
426 : */
427 : Datum
428 7492268 : hash_any(register const unsigned char *k, register int keylen)
429 : {
430 : register uint32 a,
431 : b,
432 : c,
433 : len;
434 :
435 : /* Set up the internal state */
436 7492268 : len = keylen;
437 7492268 : a = b = c = 0x9e3779b9 + len + 3923095;
438 :
439 : /* If the source pointer is word-aligned, we use word-wide fetches */
440 7492268 : if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
441 : {
442 : /* Code path for aligned source data */
443 7435278 : register const uint32 *ka = (const uint32 *) k;
444 :
445 : /* handle most of the key */
446 22390861 : while (len >= 12)
447 : {
448 7520305 : a += ka[0];
449 7520305 : b += ka[1];
450 7520305 : c += ka[2];
451 7520305 : mix(a, b, c);
452 7520305 : ka += 3;
453 7520305 : len -= 12;
454 : }
455 :
456 : /* handle the last 11 bytes */
457 7435278 : k = (const unsigned char *) ka;
458 : #ifdef WORDS_BIGENDIAN
459 : switch (len)
460 : {
461 : case 11:
462 : c += ((uint32) k[10] << 8);
463 : /* fall through */
464 : case 10:
465 : c += ((uint32) k[9] << 16);
466 : /* fall through */
467 : case 9:
468 : c += ((uint32) k[8] << 24);
469 : /* the lowest byte of c is reserved for the length */
470 : /* fall through */
471 : case 8:
472 : b += ka[1];
473 : a += ka[0];
474 : break;
475 : case 7:
476 : b += ((uint32) k[6] << 8);
477 : /* fall through */
478 : case 6:
479 : b += ((uint32) k[5] << 16);
480 : /* fall through */
481 : case 5:
482 : b += ((uint32) k[4] << 24);
483 : /* fall through */
484 : case 4:
485 : a += ka[0];
486 : break;
487 : case 3:
488 : a += ((uint32) k[2] << 8);
489 : /* fall through */
490 : case 2:
491 : a += ((uint32) k[1] << 16);
492 : /* fall through */
493 : case 1:
494 : a += ((uint32) k[0] << 24);
495 : /* case 0: nothing left to add */
496 : }
497 : #else /* !WORDS_BIGENDIAN */
498 7435278 : switch (len)
499 : {
500 : case 11:
501 12960 : c += ((uint32) k[10] << 24);
502 : /* fall through */
503 : case 10:
504 46677 : c += ((uint32) k[9] << 16);
505 : /* fall through */
506 : case 9:
507 68721 : c += ((uint32) k[8] << 8);
508 : /* the lowest byte of c is reserved for the length */
509 : /* fall through */
510 : case 8:
511 5986166 : b += ka[1];
512 5986166 : a += ka[0];
513 5986166 : break;
514 : case 7:
515 23378 : b += ((uint32) k[6] << 16);
516 : /* fall through */
517 : case 6:
518 73531 : b += ((uint32) k[5] << 8);
519 : /* fall through */
520 : case 5:
521 87928 : b += k[4];
522 : /* fall through */
523 : case 4:
524 1179651 : a += ka[0];
525 1179651 : break;
526 : case 3:
527 23005 : a += ((uint32) k[2] << 16);
528 : /* fall through */
529 : case 2:
530 68915 : a += ((uint32) k[1] << 8);
531 : /* fall through */
532 : case 1:
533 99267 : a += k[0];
534 : /* case 0: nothing left to add */
535 : }
536 : #endif /* WORDS_BIGENDIAN */
537 : }
538 : else
539 : {
540 : /* Code path for non-aligned source data */
541 :
542 : /* handle most of the key */
543 116360 : while (len >= 12)
544 : {
545 : #ifdef WORDS_BIGENDIAN
546 : a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
547 : b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
548 : c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
549 : #else /* !WORDS_BIGENDIAN */
550 2380 : a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
551 2380 : b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
552 2380 : c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
553 : #endif /* WORDS_BIGENDIAN */
554 2380 : mix(a, b, c);
555 2380 : k += 12;
556 2380 : len -= 12;
557 : }
558 :
559 : /* handle the last 11 bytes */
560 : #ifdef WORDS_BIGENDIAN
561 : switch (len) /* all the case statements fall through */
562 : {
563 : case 11:
564 : c += ((uint32) k[10] << 8);
565 : case 10:
566 : c += ((uint32) k[9] << 16);
567 : case 9:
568 : c += ((uint32) k[8] << 24);
569 : /* the lowest byte of c is reserved for the length */
570 : case 8:
571 : b += k[7];
572 : case 7:
573 : b += ((uint32) k[6] << 8);
574 : case 6:
575 : b += ((uint32) k[5] << 16);
576 : case 5:
577 : b += ((uint32) k[4] << 24);
578 : case 4:
579 : a += k[3];
580 : case 3:
581 : a += ((uint32) k[2] << 8);
582 : case 2:
583 : a += ((uint32) k[1] << 16);
584 : case 1:
585 : a += ((uint32) k[0] << 24);
586 : /* case 0: nothing left to add */
587 : }
588 : #else /* !WORDS_BIGENDIAN */
589 56990 : switch (len) /* all the case statements fall through */
590 : {
591 : case 11:
592 134 : c += ((uint32) k[10] << 24);
593 : case 10:
594 11772 : c += ((uint32) k[9] << 16);
595 : case 9:
596 17112 : c += ((uint32) k[8] << 8);
597 : /* the lowest byte of c is reserved for the length */
598 : case 8:
599 19777 : b += ((uint32) k[7] << 24);
600 : case 7:
601 21278 : b += ((uint32) k[6] << 16);
602 : case 6:
603 23330 : b += ((uint32) k[5] << 8);
604 : case 5:
605 26083 : b += k[4];
606 : case 4:
607 31952 : a += ((uint32) k[3] << 24);
608 : case 3:
609 34843 : a += ((uint32) k[2] << 16);
610 : case 2:
611 56148 : a += ((uint32) k[1] << 8);
612 : case 1:
613 56977 : a += k[0];
614 : /* case 0: nothing left to add */
615 : }
616 : #endif /* WORDS_BIGENDIAN */
617 : }
618 :
619 7492268 : final(a, b, c);
620 :
621 : /* report the result */
622 7492268 : return UInt32GetDatum(c);
623 : }
624 :
625 : /*
626 : * hash_any_extended() -- hash into a 64-bit value, using an optional seed
627 : * k : the key (the unaligned variable-length array of bytes)
628 : * len : the length of the key, counting by bytes
629 : * seed : a 64-bit seed (0 means no seed)
630 : *
631 : * Returns a uint64 value. Otherwise similar to hash_any.
632 : */
633 : Datum
634 142 : hash_any_extended(register const unsigned char *k, register int keylen,
635 : uint64 seed)
636 : {
637 : register uint32 a,
638 : b,
639 : c,
640 : len;
641 :
642 : /* Set up the internal state */
643 142 : len = keylen;
644 142 : a = b = c = 0x9e3779b9 + len + 3923095;
645 :
646 : /* If the seed is non-zero, use it to perturb the internal state. */
647 142 : if (seed != 0)
648 : {
649 : /*
650 : * In essence, the seed is treated as part of the data being hashed,
651 : * but for simplicity, we pretend that it's padded with four bytes of
652 : * zeroes so that the seed constitutes a 12-byte chunk.
653 : */
654 71 : a += (uint32) (seed >> 32);
655 71 : b += (uint32) seed;
656 71 : mix(a, b, c);
657 : }
658 :
659 : /* If the source pointer is word-aligned, we use word-wide fetches */
660 142 : if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
661 : {
662 : /* Code path for aligned source data */
663 116 : register const uint32 *ka = (const uint32 *) k;
664 :
665 : /* handle most of the key */
666 254 : while (len >= 12)
667 : {
668 22 : a += ka[0];
669 22 : b += ka[1];
670 22 : c += ka[2];
671 22 : mix(a, b, c);
672 22 : ka += 3;
673 22 : len -= 12;
674 : }
675 :
676 : /* handle the last 11 bytes */
677 116 : k = (const unsigned char *) ka;
678 : #ifdef WORDS_BIGENDIAN
679 : switch (len)
680 : {
681 : case 11:
682 : c += ((uint32) k[10] << 8);
683 : /* fall through */
684 : case 10:
685 : c += ((uint32) k[9] << 16);
686 : /* fall through */
687 : case 9:
688 : c += ((uint32) k[8] << 24);
689 : /* the lowest byte of c is reserved for the length */
690 : /* fall through */
691 : case 8:
692 : b += ka[1];
693 : a += ka[0];
694 : break;
695 : case 7:
696 : b += ((uint32) k[6] << 8);
697 : /* fall through */
698 : case 6:
699 : b += ((uint32) k[5] << 16);
700 : /* fall through */
701 : case 5:
702 : b += ((uint32) k[4] << 24);
703 : /* fall through */
704 : case 4:
705 : a += ka[0];
706 : break;
707 : case 3:
708 : a += ((uint32) k[2] << 8);
709 : /* fall through */
710 : case 2:
711 : a += ((uint32) k[1] << 16);
712 : /* fall through */
713 : case 1:
714 : a += ((uint32) k[0] << 24);
715 : /* case 0: nothing left to add */
716 : }
717 : #else /* !WORDS_BIGENDIAN */
718 116 : switch (len)
719 : {
720 : case 11:
721 8 : c += ((uint32) k[10] << 24);
722 : /* fall through */
723 : case 10:
724 14 : c += ((uint32) k[9] << 16);
725 : /* fall through */
726 : case 9:
727 26 : c += ((uint32) k[8] << 8);
728 : /* the lowest byte of c is reserved for the length */
729 : /* fall through */
730 : case 8:
731 64 : b += ka[1];
732 64 : a += ka[0];
733 64 : break;
734 : case 7:
735 0 : b += ((uint32) k[6] << 16);
736 : /* fall through */
737 : case 6:
738 20 : b += ((uint32) k[5] << 8);
739 : /* fall through */
740 : case 5:
741 20 : b += k[4];
742 : /* fall through */
743 : case 4:
744 34 : a += ka[0];
745 34 : break;
746 : case 3:
747 4 : a += ((uint32) k[2] << 16);
748 : /* fall through */
749 : case 2:
750 4 : a += ((uint32) k[1] << 8);
751 : /* fall through */
752 : case 1:
753 14 : a += k[0];
754 : /* case 0: nothing left to add */
755 : }
756 : #endif /* WORDS_BIGENDIAN */
757 : }
758 : else
759 : {
760 : /* Code path for non-aligned source data */
761 :
762 : /* handle most of the key */
763 54 : while (len >= 12)
764 : {
765 : #ifdef WORDS_BIGENDIAN
766 : a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
767 : b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
768 : c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
769 : #else /* !WORDS_BIGENDIAN */
770 2 : a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
771 2 : b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
772 2 : c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
773 : #endif /* WORDS_BIGENDIAN */
774 2 : mix(a, b, c);
775 2 : k += 12;
776 2 : len -= 12;
777 : }
778 :
779 : /* handle the last 11 bytes */
780 : #ifdef WORDS_BIGENDIAN
781 : switch (len) /* all the case statements fall through */
782 : {
783 : case 11:
784 : c += ((uint32) k[10] << 8);
785 : case 10:
786 : c += ((uint32) k[9] << 16);
787 : case 9:
788 : c += ((uint32) k[8] << 24);
789 : /* the lowest byte of c is reserved for the length */
790 : case 8:
791 : b += k[7];
792 : case 7:
793 : b += ((uint32) k[6] << 8);
794 : case 6:
795 : b += ((uint32) k[5] << 16);
796 : case 5:
797 : b += ((uint32) k[4] << 24);
798 : case 4:
799 : a += k[3];
800 : case 3:
801 : a += ((uint32) k[2] << 8);
802 : case 2:
803 : a += ((uint32) k[1] << 16);
804 : case 1:
805 : a += ((uint32) k[0] << 24);
806 : /* case 0: nothing left to add */
807 : }
808 : #else /* !WORDS_BIGENDIAN */
809 26 : switch (len) /* all the case statements fall through */
810 : {
811 : case 11:
812 0 : c += ((uint32) k[10] << 24);
813 : case 10:
814 2 : c += ((uint32) k[9] << 16);
815 : case 9:
816 2 : c += ((uint32) k[8] << 8);
817 : /* the lowest byte of c is reserved for the length */
818 : case 8:
819 10 : b += ((uint32) k[7] << 24);
820 : case 7:
821 12 : b += ((uint32) k[6] << 16);
822 : case 6:
823 12 : b += ((uint32) k[5] << 8);
824 : case 5:
825 14 : b += k[4];
826 : case 4:
827 16 : a += ((uint32) k[3] << 24);
828 : case 3:
829 18 : a += ((uint32) k[2] << 16);
830 : case 2:
831 20 : a += ((uint32) k[1] << 8);
832 : case 1:
833 26 : a += k[0];
834 : /* case 0: nothing left to add */
835 : }
836 : #endif /* WORDS_BIGENDIAN */
837 : }
838 :
839 142 : final(a, b, c);
840 :
841 : /* report the result */
842 142 : PG_RETURN_UINT64(((uint64) b << 32) | c);
843 : }
844 :
845 : /*
846 : * hash_uint32() -- hash a 32-bit value to a 32-bit value
847 : *
848 : * This has the same result as
849 : * hash_any(&k, sizeof(uint32))
850 : * but is faster and doesn't force the caller to store k into memory.
851 : */
852 : Datum
853 6112300 : hash_uint32(uint32 k)
854 : {
855 : register uint32 a,
856 : b,
857 : c;
858 :
859 6112300 : a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
860 6112300 : a += k;
861 :
862 6112300 : final(a, b, c);
863 :
864 : /* report the result */
865 6112300 : return UInt32GetDatum(c);
866 : }
867 :
868 : /*
869 : * hash_uint32_extended() -- hash a 32-bit value to a 64-bit value, with a seed
870 : *
871 : * Like hash_uint32, this is a convenience function.
872 : */
873 : Datum
874 190 : hash_uint32_extended(uint32 k, uint64 seed)
875 : {
876 : register uint32 a,
877 : b,
878 : c;
879 :
880 190 : a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
881 :
882 190 : if (seed != 0)
883 : {
884 96 : a += (uint32) (seed >> 32);
885 96 : b += (uint32) seed;
886 96 : mix(a, b, c);
887 : }
888 :
889 190 : a += k;
890 :
891 190 : final(a, b, c);
892 :
893 : /* report the result */
894 190 : PG_RETURN_UINT64(((uint64) b << 32) | c);
895 : }
|