Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * uuid.c
4 : * Functions for the built-in type "uuid".
5 : *
6 : * Copyright (c) 2007-2017, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/adt/uuid.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres.h"
15 :
16 : #include "access/hash.h"
17 : #include "lib/hyperloglog.h"
18 : #include "libpq/pqformat.h"
19 : #include "port/pg_bswap.h"
20 : #include "utils/builtins.h"
21 : #include "utils/guc.h"
22 : #include "utils/sortsupport.h"
23 : #include "utils/uuid.h"
24 :
25 : /* sortsupport for uuid */
26 : typedef struct
27 : {
28 : int64 input_count; /* number of non-null values seen */
29 : bool estimating; /* true if estimating cardinality */
30 :
31 : hyperLogLogState abbr_card; /* cardinality estimator */
32 : } uuid_sortsupport_state;
33 :
34 : static void string_to_uuid(const char *source, pg_uuid_t *uuid);
35 : static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
36 : static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
37 : static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
38 : static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
39 : static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
40 :
41 : Datum
42 155 : uuid_in(PG_FUNCTION_ARGS)
43 : {
44 155 : char *uuid_str = PG_GETARG_CSTRING(0);
45 : pg_uuid_t *uuid;
46 :
47 155 : uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
48 155 : string_to_uuid(uuid_str, uuid);
49 149 : PG_RETURN_UUID_P(uuid);
50 : }
51 :
52 : Datum
53 25 : uuid_out(PG_FUNCTION_ARGS)
54 : {
55 25 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
56 : static const char hex_chars[] = "0123456789abcdef";
57 : StringInfoData buf;
58 : int i;
59 :
60 25 : initStringInfo(&buf);
61 425 : for (i = 0; i < UUID_LEN; i++)
62 : {
63 : int hi;
64 : int lo;
65 :
66 : /*
67 : * We print uuid values as a string of 8, 4, 4, 4, and then 12
68 : * hexadecimal characters, with each group is separated by a hyphen
69 : * ("-"). Therefore, add the hyphens at the appropriate places here.
70 : */
71 400 : if (i == 4 || i == 6 || i == 8 || i == 10)
72 100 : appendStringInfoChar(&buf, '-');
73 :
74 400 : hi = uuid->data[i] >> 4;
75 400 : lo = uuid->data[i] & 0x0F;
76 :
77 400 : appendStringInfoChar(&buf, hex_chars[hi]);
78 400 : appendStringInfoChar(&buf, hex_chars[lo]);
79 : }
80 :
81 25 : PG_RETURN_CSTRING(buf.data);
82 : }
83 :
84 : /*
85 : * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
86 : * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
87 : * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
88 : * digits, is the only one used for output.)
89 : */
90 : static void
91 155 : string_to_uuid(const char *source, pg_uuid_t *uuid)
92 : {
93 155 : const char *src = source;
94 155 : bool braces = false;
95 : int i;
96 :
97 155 : if (src[0] == '{')
98 : {
99 4 : src++;
100 4 : braces = true;
101 : }
102 :
103 2596 : for (i = 0; i < UUID_LEN; i++)
104 : {
105 : char str_buf[3];
106 :
107 2445 : if (src[0] == '\0' || src[1] == '\0')
108 : goto syntax_error;
109 2445 : memcpy(str_buf, src, 2);
110 4888 : if (!isxdigit((unsigned char) str_buf[0]) ||
111 2443 : !isxdigit((unsigned char) str_buf[1]))
112 : goto syntax_error;
113 :
114 2441 : str_buf[2] = '\0';
115 2441 : uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
116 2441 : src += 2;
117 2441 : if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
118 599 : src++;
119 : }
120 :
121 151 : if (braces)
122 : {
123 3 : if (*src != '}')
124 1 : goto syntax_error;
125 2 : src++;
126 : }
127 :
128 150 : if (*src != '\0')
129 1 : goto syntax_error;
130 :
131 298 : return;
132 :
133 : syntax_error:
134 6 : ereport(ERROR,
135 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
136 : errmsg("invalid input syntax for type %s: \"%s\"",
137 : "uuid", source)));
138 : }
139 :
140 : Datum
141 0 : uuid_recv(PG_FUNCTION_ARGS)
142 : {
143 0 : StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
144 : pg_uuid_t *uuid;
145 :
146 0 : uuid = (pg_uuid_t *) palloc(UUID_LEN);
147 0 : memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
148 0 : PG_RETURN_POINTER(uuid);
149 : }
150 :
151 : Datum
152 0 : uuid_send(PG_FUNCTION_ARGS)
153 : {
154 0 : pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
155 : StringInfoData buffer;
156 :
157 0 : pq_begintypsend(&buffer);
158 0 : pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
159 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
160 : }
161 :
162 : /* internal uuid compare function */
163 : static int
164 1905 : uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
165 : {
166 1905 : return memcmp(arg1->data, arg2->data, UUID_LEN);
167 : }
168 :
169 : Datum
170 513 : uuid_lt(PG_FUNCTION_ARGS)
171 : {
172 513 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
173 513 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
174 :
175 513 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
176 : }
177 :
178 : Datum
179 403 : uuid_le(PG_FUNCTION_ARGS)
180 : {
181 403 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
182 403 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
183 :
184 403 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
185 : }
186 :
187 : Datum
188 110 : uuid_eq(PG_FUNCTION_ARGS)
189 : {
190 110 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
191 110 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
192 :
193 110 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
194 : }
195 :
196 : Datum
197 354 : uuid_ge(PG_FUNCTION_ARGS)
198 : {
199 354 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
200 354 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
201 :
202 354 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
203 : }
204 :
205 : Datum
206 513 : uuid_gt(PG_FUNCTION_ARGS)
207 : {
208 513 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
209 513 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
210 :
211 513 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
212 : }
213 :
214 : Datum
215 3 : uuid_ne(PG_FUNCTION_ARGS)
216 : {
217 3 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
218 3 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
219 :
220 3 : PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
221 : }
222 :
223 : /* handler for btree index operator */
224 : Datum
225 9 : uuid_cmp(PG_FUNCTION_ARGS)
226 : {
227 9 : pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
228 9 : pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
229 :
230 9 : PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
231 : }
232 :
233 : /*
234 : * Sort support strategy routine
235 : */
236 : Datum
237 5 : uuid_sortsupport(PG_FUNCTION_ARGS)
238 : {
239 5 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
240 :
241 5 : ssup->comparator = uuid_fast_cmp;
242 5 : ssup->ssup_extra = NULL;
243 :
244 5 : if (ssup->abbreviate)
245 : {
246 : uuid_sortsupport_state *uss;
247 : MemoryContext oldcontext;
248 :
249 5 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
250 :
251 5 : uss = palloc(sizeof(uuid_sortsupport_state));
252 5 : uss->input_count = 0;
253 5 : uss->estimating = true;
254 5 : initHyperLogLog(&uss->abbr_card, 10);
255 :
256 5 : ssup->ssup_extra = uss;
257 :
258 5 : ssup->comparator = uuid_cmp_abbrev;
259 5 : ssup->abbrev_converter = uuid_abbrev_convert;
260 5 : ssup->abbrev_abort = uuid_abbrev_abort;
261 5 : ssup->abbrev_full_comparator = uuid_fast_cmp;
262 :
263 5 : MemoryContextSwitchTo(oldcontext);
264 : }
265 :
266 5 : PG_RETURN_VOID();
267 : }
268 :
269 : /*
270 : * SortSupport comparison func
271 : */
272 : static int
273 0 : uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
274 : {
275 0 : pg_uuid_t *arg1 = DatumGetUUIDP(x);
276 0 : pg_uuid_t *arg2 = DatumGetUUIDP(y);
277 :
278 0 : return uuid_internal_cmp(arg1, arg2);
279 : }
280 :
281 : /*
282 : * Abbreviated key comparison func
283 : */
284 : static int
285 9 : uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
286 : {
287 9 : if (x > y)
288 0 : return 1;
289 9 : else if (x == y)
290 0 : return 0;
291 : else
292 9 : return -1;
293 : }
294 :
295 : /*
296 : * Callback for estimating effectiveness of abbreviated key optimization.
297 : *
298 : * We pay no attention to the cardinality of the non-abbreviated data, because
299 : * there is no equality fast-path within authoritative uuid comparator.
300 : */
301 : static bool
302 0 : uuid_abbrev_abort(int memtupcount, SortSupport ssup)
303 : {
304 0 : uuid_sortsupport_state *uss = ssup->ssup_extra;
305 : double abbr_card;
306 :
307 0 : if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
308 0 : return false;
309 :
310 0 : abbr_card = estimateHyperLogLog(&uss->abbr_card);
311 :
312 : /*
313 : * If we have >100k distinct values, then even if we were sorting many
314 : * billion rows we'd likely still break even, and the penalty of undoing
315 : * that many rows of abbrevs would probably not be worth it. Stop even
316 : * counting at that point.
317 : */
318 0 : if (abbr_card > 100000.0)
319 : {
320 : #ifdef TRACE_SORT
321 0 : if (trace_sort)
322 0 : elog(LOG,
323 : "uuid_abbrev: estimation ends at cardinality %f"
324 : " after " INT64_FORMAT " values (%d rows)",
325 : abbr_card, uss->input_count, memtupcount);
326 : #endif
327 0 : uss->estimating = false;
328 0 : return false;
329 : }
330 :
331 : /*
332 : * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
333 : * fudge factor allows us to abort earlier on genuinely pathological data
334 : * where we've had exactly one abbreviated value in the first 2k
335 : * (non-null) rows.
336 : */
337 0 : if (abbr_card < uss->input_count / 2000.0 + 0.5)
338 : {
339 : #ifdef TRACE_SORT
340 0 : if (trace_sort)
341 0 : elog(LOG,
342 : "uuid_abbrev: aborting abbreviation at cardinality %f"
343 : " below threshold %f after " INT64_FORMAT " values (%d rows)",
344 : abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
345 : memtupcount);
346 : #endif
347 0 : return true;
348 : }
349 :
350 : #ifdef TRACE_SORT
351 0 : if (trace_sort)
352 0 : elog(LOG,
353 : "uuid_abbrev: cardinality %f after " INT64_FORMAT
354 : " values (%d rows)", abbr_card, uss->input_count, memtupcount);
355 : #endif
356 :
357 0 : return false;
358 : }
359 :
360 : /*
361 : * Conversion routine for sortsupport. Converts original uuid representation
362 : * to abbreviated key representation. Our encoding strategy is simple -- pack
363 : * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
364 : * machines, the bytes are stored in reverse order), and treat it as an
365 : * unsigned integer.
366 : */
367 : static Datum
368 12 : uuid_abbrev_convert(Datum original, SortSupport ssup)
369 : {
370 12 : uuid_sortsupport_state *uss = ssup->ssup_extra;
371 12 : pg_uuid_t *authoritative = DatumGetUUIDP(original);
372 : Datum res;
373 :
374 12 : memcpy(&res, authoritative->data, sizeof(Datum));
375 12 : uss->input_count += 1;
376 :
377 12 : if (uss->estimating)
378 : {
379 : uint32 tmp;
380 :
381 : #if SIZEOF_DATUM == 8
382 : tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
383 : #else /* SIZEOF_DATUM != 8 */
384 12 : tmp = (uint32) res;
385 : #endif
386 :
387 12 : addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
388 : }
389 :
390 : /*
391 : * Byteswap on little-endian machines.
392 : *
393 : * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
394 : * comparator) works correctly on all platforms. If we didn't do this,
395 : * the comparator would have to call memcmp() with a pair of pointers to
396 : * the first byte of each abbreviated key, which is slower.
397 : */
398 12 : res = DatumBigEndianToNative(res);
399 :
400 12 : return res;
401 : }
402 :
403 : /* hash index support */
404 : Datum
405 29 : uuid_hash(PG_FUNCTION_ARGS)
406 : {
407 29 : pg_uuid_t *key = PG_GETARG_UUID_P(0);
408 :
409 29 : return hash_any(key->data, UUID_LEN);
410 : }
411 :
412 : Datum
413 10 : uuid_hash_extended(PG_FUNCTION_ARGS)
414 : {
415 10 : pg_uuid_t *key = PG_GETARG_UUID_P(0);
416 :
417 10 : return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
418 : }
|