Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * jsonb_gin.c
4 : * GIN support functions for jsonb
5 : *
6 : * Copyright (c) 2014-2017, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/jsonb_gin.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/gin.h"
17 : #include "access/hash.h"
18 : #include "access/stratnum.h"
19 : #include "catalog/pg_collation.h"
20 : #include "catalog/pg_type.h"
21 : #include "utils/builtins.h"
22 : #include "utils/jsonb.h"
23 : #include "utils/varlena.h"
24 :
25 : typedef struct PathHashStack
26 : {
27 : uint32 hash;
28 : struct PathHashStack *parent;
29 : } PathHashStack;
30 :
31 : static Datum make_text_key(char flag, const char *str, int len);
32 : static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key);
33 :
34 : /*
35 : *
36 : * jsonb_ops GIN opclass support functions
37 : *
38 : */
39 :
40 : Datum
41 121795 : gin_compare_jsonb(PG_FUNCTION_ARGS)
42 : {
43 121795 : text *arg1 = PG_GETARG_TEXT_PP(0);
44 121795 : text *arg2 = PG_GETARG_TEXT_PP(1);
45 : int32 result;
46 : char *a1p,
47 : *a2p;
48 : int len1,
49 : len2;
50 :
51 121795 : a1p = VARDATA_ANY(arg1);
52 121795 : a2p = VARDATA_ANY(arg2);
53 :
54 121795 : len1 = VARSIZE_ANY_EXHDR(arg1);
55 121795 : len2 = VARSIZE_ANY_EXHDR(arg2);
56 :
57 : /* Compare text as bttextcmp does, but always using C collation */
58 121795 : result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID);
59 :
60 121795 : PG_FREE_IF_COPY(arg1, 0);
61 121795 : PG_FREE_IF_COPY(arg2, 1);
62 :
63 121795 : PG_RETURN_INT32(result);
64 : }
65 :
66 : Datum
67 1035 : gin_extract_jsonb(PG_FUNCTION_ARGS)
68 : {
69 1035 : Jsonb *jb = (Jsonb *) PG_GETARG_JSONB(0);
70 1035 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
71 1035 : int total = 2 * JB_ROOT_COUNT(jb);
72 : JsonbIterator *it;
73 : JsonbValue v;
74 : JsonbIteratorToken r;
75 1035 : int i = 0;
76 : Datum *entries;
77 :
78 : /* If the root level is empty, we certainly have no keys */
79 1035 : if (total == 0)
80 : {
81 120 : *nentries = 0;
82 120 : PG_RETURN_POINTER(NULL);
83 : }
84 :
85 : /* Otherwise, use 2 * root count as initial estimate of result size */
86 915 : entries = (Datum *) palloc(sizeof(Datum) * total);
87 :
88 915 : it = JsonbIteratorInit(&jb->root);
89 :
90 13320 : while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
91 : {
92 : /* Since we recurse into the object, we might need more space */
93 11490 : if (i >= total)
94 : {
95 910 : total *= 2;
96 910 : entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
97 : }
98 :
99 11490 : switch (r)
100 : {
101 : case WJB_KEY:
102 4810 : entries[i++] = make_scalar_key(&v, true);
103 4810 : break;
104 : case WJB_ELEM:
105 : /* Pretend string array elements are keys, see jsonb.h */
106 28 : entries[i++] = make_scalar_key(&v, (v.type == jbvString));
107 28 : break;
108 : case WJB_VALUE:
109 4798 : entries[i++] = make_scalar_key(&v, false);
110 4798 : break;
111 : default:
112 : /* we can ignore structural items */
113 1854 : break;
114 : }
115 : }
116 :
117 915 : *nentries = i;
118 :
119 915 : PG_RETURN_POINTER(entries);
120 : }
121 :
122 : Datum
123 30 : gin_extract_jsonb_query(PG_FUNCTION_ARGS)
124 : {
125 30 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
126 30 : StrategyNumber strategy = PG_GETARG_UINT16(2);
127 30 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
128 : Datum *entries;
129 :
130 30 : if (strategy == JsonbContainsStrategyNumber)
131 : {
132 : /* Query is a jsonb, so just apply gin_extract_jsonb... */
133 18 : entries = (Datum *)
134 18 : DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb,
135 : PG_GETARG_DATUM(0),
136 : PointerGetDatum(nentries)));
137 : /* ...although "contains {}" requires a full index scan */
138 18 : if (*nentries == 0)
139 2 : *searchMode = GIN_SEARCH_MODE_ALL;
140 : }
141 12 : else if (strategy == JsonbExistsStrategyNumber)
142 : {
143 : /* Query is a text string, which we treat as a key */
144 8 : text *query = PG_GETARG_TEXT_PP(0);
145 :
146 8 : *nentries = 1;
147 8 : entries = (Datum *) palloc(sizeof(Datum));
148 32 : entries[0] = make_text_key(JGINFLAG_KEY,
149 8 : VARDATA_ANY(query),
150 24 : VARSIZE_ANY_EXHDR(query));
151 : }
152 4 : else if (strategy == JsonbExistsAnyStrategyNumber ||
153 : strategy == JsonbExistsAllStrategyNumber)
154 4 : {
155 : /* Query is a text array; each element is treated as a key */
156 4 : ArrayType *query = PG_GETARG_ARRAYTYPE_P(0);
157 : Datum *key_datums;
158 : bool *key_nulls;
159 : int key_count;
160 : int i,
161 : j;
162 :
163 4 : deconstruct_array(query,
164 : TEXTOID, -1, false, 'i',
165 : &key_datums, &key_nulls, &key_count);
166 :
167 4 : entries = (Datum *) palloc(sizeof(Datum) * key_count);
168 :
169 12 : for (i = 0, j = 0; i < key_count; i++)
170 : {
171 : /* Nulls in the array are ignored */
172 8 : if (key_nulls[i])
173 0 : continue;
174 24 : entries[j++] = make_text_key(JGINFLAG_KEY,
175 8 : VARDATA(key_datums[i]),
176 8 : VARSIZE(key_datums[i]) - VARHDRSZ);
177 : }
178 :
179 4 : *nentries = j;
180 : /* ExistsAll with no keys should match everything */
181 4 : if (j == 0 && strategy == JsonbExistsAllStrategyNumber)
182 0 : *searchMode = GIN_SEARCH_MODE_ALL;
183 : }
184 : else
185 : {
186 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
187 : entries = NULL; /* keep compiler quiet */
188 : }
189 :
190 30 : PG_RETURN_POINTER(entries);
191 : }
192 :
193 : Datum
194 0 : gin_consistent_jsonb(PG_FUNCTION_ARGS)
195 : {
196 0 : bool *check = (bool *) PG_GETARG_POINTER(0);
197 0 : StrategyNumber strategy = PG_GETARG_UINT16(1);
198 :
199 : /* Jsonb *query = PG_GETARG_JSONB(2); */
200 0 : int32 nkeys = PG_GETARG_INT32(3);
201 :
202 : /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
203 0 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
204 0 : bool res = true;
205 : int32 i;
206 :
207 0 : if (strategy == JsonbContainsStrategyNumber)
208 : {
209 : /*
210 : * We must always recheck, since we can't tell from the index whether
211 : * the positions of the matched items match the structure of the query
212 : * object. (Even if we could, we'd also have to worry about hashed
213 : * keys and the index's failure to distinguish keys from string array
214 : * elements.) However, the tuple certainly doesn't match unless it
215 : * contains all the query keys.
216 : */
217 0 : *recheck = true;
218 0 : for (i = 0; i < nkeys; i++)
219 : {
220 0 : if (!check[i])
221 : {
222 0 : res = false;
223 0 : break;
224 : }
225 : }
226 : }
227 0 : else if (strategy == JsonbExistsStrategyNumber)
228 : {
229 : /*
230 : * Although the key is certainly present in the index, we must recheck
231 : * because (1) the key might be hashed, and (2) the index match might
232 : * be for a key that's not at top level of the JSON object. For (1),
233 : * we could look at the query key to see if it's hashed and not
234 : * recheck if not, but the index lacks enough info to tell about (2).
235 : */
236 0 : *recheck = true;
237 0 : res = true;
238 : }
239 0 : else if (strategy == JsonbExistsAnyStrategyNumber)
240 : {
241 : /* As for plain exists, we must recheck */
242 0 : *recheck = true;
243 0 : res = true;
244 : }
245 0 : else if (strategy == JsonbExistsAllStrategyNumber)
246 : {
247 : /* As for plain exists, we must recheck */
248 0 : *recheck = true;
249 : /* ... but unless all the keys are present, we can say "false" */
250 0 : for (i = 0; i < nkeys; i++)
251 : {
252 0 : if (!check[i])
253 : {
254 0 : res = false;
255 0 : break;
256 : }
257 : }
258 : }
259 : else
260 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
261 :
262 0 : PG_RETURN_BOOL(res);
263 : }
264 :
265 : Datum
266 1983 : gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
267 : {
268 1983 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
269 1983 : StrategyNumber strategy = PG_GETARG_UINT16(1);
270 :
271 : /* Jsonb *query = PG_GETARG_JSONB(2); */
272 1983 : int32 nkeys = PG_GETARG_INT32(3);
273 :
274 : /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
275 1983 : GinTernaryValue res = GIN_MAYBE;
276 : int32 i;
277 :
278 : /*
279 : * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
280 : * corresponds to always forcing recheck in the regular consistent
281 : * function, for the reasons listed there.
282 : */
283 3426 : if (strategy == JsonbContainsStrategyNumber ||
284 : strategy == JsonbExistsAllStrategyNumber)
285 : {
286 : /* All extracted keys must be present */
287 1690 : for (i = 0; i < nkeys; i++)
288 : {
289 589 : if (check[i] == GIN_FALSE)
290 : {
291 342 : res = GIN_FALSE;
292 342 : break;
293 : }
294 : }
295 : }
296 1080 : else if (strategy == JsonbExistsStrategyNumber ||
297 : strategy == JsonbExistsAnyStrategyNumber)
298 : {
299 : /* At least one extracted key must be present */
300 540 : res = GIN_FALSE;
301 683 : for (i = 0; i < nkeys; i++)
302 : {
303 827 : if (check[i] == GIN_TRUE ||
304 144 : check[i] == GIN_MAYBE)
305 : {
306 540 : res = GIN_MAYBE;
307 540 : break;
308 : }
309 : }
310 : }
311 : else
312 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
313 :
314 1983 : PG_RETURN_GIN_TERNARY_VALUE(res);
315 : }
316 :
317 : /*
318 : *
319 : * jsonb_path_ops GIN opclass support functions
320 : *
321 : * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON
322 : * value; but the JSON key(s) leading to each value are also included in its
323 : * hash computation. This means we can only support containment queries,
324 : * but the index can distinguish, for example, {"foo": 42} from {"bar": 42}
325 : * since different hashes will be generated.
326 : *
327 : */
328 :
329 : Datum
330 1036 : gin_extract_jsonb_path(PG_FUNCTION_ARGS)
331 : {
332 1036 : Jsonb *jb = PG_GETARG_JSONB(0);
333 1036 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
334 1036 : int total = 2 * JB_ROOT_COUNT(jb);
335 : JsonbIterator *it;
336 : JsonbValue v;
337 : JsonbIteratorToken r;
338 : PathHashStack tail;
339 : PathHashStack *stack;
340 1036 : int i = 0;
341 : Datum *entries;
342 :
343 : /* If the root level is empty, we certainly have no keys */
344 1036 : if (total == 0)
345 : {
346 120 : *nentries = 0;
347 120 : PG_RETURN_POINTER(NULL);
348 : }
349 :
350 : /* Otherwise, use 2 * root count as initial estimate of result size */
351 916 : entries = (Datum *) palloc(sizeof(Datum) * total);
352 :
353 : /* We keep a stack of partial hashes corresponding to parent key levels */
354 916 : tail.parent = NULL;
355 916 : tail.hash = 0;
356 916 : stack = &tail;
357 :
358 916 : it = JsonbIteratorInit(&jb->root);
359 :
360 13367 : while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
361 : {
362 : PathHashStack *parent;
363 :
364 : /* Since we recurse into the object, we might need more space */
365 11535 : if (i >= total)
366 : {
367 7 : total *= 2;
368 7 : entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
369 : }
370 :
371 11535 : switch (r)
372 : {
373 : case WJB_BEGIN_ARRAY:
374 : case WJB_BEGIN_OBJECT:
375 : /* Push a stack level for this object */
376 943 : parent = stack;
377 943 : stack = (PathHashStack *) palloc(sizeof(PathHashStack));
378 :
379 : /*
380 : * We pass forward hashes from outer nesting levels so that
381 : * the hashes for nested values will include outer keys as
382 : * well as their own keys.
383 : *
384 : * Nesting an array within another array will not alter
385 : * innermost scalar element hash values, but that seems
386 : * inconsequential.
387 : */
388 943 : stack->hash = parent->hash;
389 943 : stack->parent = parent;
390 943 : break;
391 : case WJB_KEY:
392 : /* mix this key into the current outer hash */
393 4819 : JsonbHashScalarValue(&v, &stack->hash);
394 : /* hash is now ready to incorporate the value */
395 4819 : break;
396 : case WJB_ELEM:
397 : case WJB_VALUE:
398 : /* mix the element or value's hash into the prepared hash */
399 4830 : JsonbHashScalarValue(&v, &stack->hash);
400 : /* and emit an index entry */
401 4830 : entries[i++] = UInt32GetDatum(stack->hash);
402 : /* reset hash for next key, value, or sub-object */
403 4830 : stack->hash = stack->parent->hash;
404 4830 : break;
405 : case WJB_END_ARRAY:
406 : case WJB_END_OBJECT:
407 : /* Pop the stack */
408 943 : parent = stack->parent;
409 943 : pfree(stack);
410 943 : stack = parent;
411 : /* reset hash for next key, value, or sub-object */
412 943 : if (stack->parent)
413 27 : stack->hash = stack->parent->hash;
414 : else
415 916 : stack->hash = 0;
416 943 : break;
417 : default:
418 0 : elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
419 : }
420 : }
421 :
422 916 : *nentries = i;
423 :
424 916 : PG_RETURN_POINTER(entries);
425 : }
426 :
427 : Datum
428 21 : gin_extract_jsonb_query_path(PG_FUNCTION_ARGS)
429 : {
430 21 : int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
431 21 : StrategyNumber strategy = PG_GETARG_UINT16(2);
432 21 : int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
433 : Datum *entries;
434 :
435 21 : if (strategy != JsonbContainsStrategyNumber)
436 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
437 :
438 : /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */
439 21 : entries = (Datum *)
440 21 : DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path,
441 : PG_GETARG_DATUM(0),
442 : PointerGetDatum(nentries)));
443 :
444 : /* ... although "contains {}" requires a full index scan */
445 21 : if (*nentries == 0)
446 2 : *searchMode = GIN_SEARCH_MODE_ALL;
447 :
448 21 : PG_RETURN_POINTER(entries);
449 : }
450 :
451 : Datum
452 0 : gin_consistent_jsonb_path(PG_FUNCTION_ARGS)
453 : {
454 0 : bool *check = (bool *) PG_GETARG_POINTER(0);
455 0 : StrategyNumber strategy = PG_GETARG_UINT16(1);
456 :
457 : /* Jsonb *query = PG_GETARG_JSONB(2); */
458 0 : int32 nkeys = PG_GETARG_INT32(3);
459 :
460 : /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
461 0 : bool *recheck = (bool *) PG_GETARG_POINTER(5);
462 0 : bool res = true;
463 : int32 i;
464 :
465 0 : if (strategy != JsonbContainsStrategyNumber)
466 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
467 :
468 : /*
469 : * jsonb_path_ops is necessarily lossy, not only because of hash
470 : * collisions but also because it doesn't preserve complete information
471 : * about the structure of the JSON object. Besides, there are some
472 : * special rules around the containment of raw scalars in arrays that are
473 : * not handled here. So we must always recheck a match. However, if not
474 : * all of the keys are present, the tuple certainly doesn't match.
475 : */
476 0 : *recheck = true;
477 0 : for (i = 0; i < nkeys; i++)
478 : {
479 0 : if (!check[i])
480 : {
481 0 : res = false;
482 0 : break;
483 : }
484 : }
485 :
486 0 : PG_RETURN_BOOL(res);
487 : }
488 :
489 : Datum
490 1052 : gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS)
491 : {
492 1052 : GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
493 1052 : StrategyNumber strategy = PG_GETARG_UINT16(1);
494 :
495 : /* Jsonb *query = PG_GETARG_JSONB(2); */
496 1052 : int32 nkeys = PG_GETARG_INT32(3);
497 :
498 : /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
499 1052 : GinTernaryValue res = GIN_MAYBE;
500 : int32 i;
501 :
502 1052 : if (strategy != JsonbContainsStrategyNumber)
503 0 : elog(ERROR, "unrecognized strategy number: %d", strategy);
504 :
505 : /*
506 : * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
507 : * corresponds to always forcing recheck in the regular consistent
508 : * function, for the reasons listed there.
509 : */
510 1093 : for (i = 0; i < nkeys; i++)
511 : {
512 55 : if (check[i] == GIN_FALSE)
513 : {
514 14 : res = GIN_FALSE;
515 14 : break;
516 : }
517 : }
518 :
519 1052 : PG_RETURN_GIN_TERNARY_VALUE(res);
520 : }
521 :
522 : /*
523 : * Construct a jsonb_ops GIN key from a flag byte and a textual representation
524 : * (which need not be null-terminated). This function is responsible
525 : * for hashing overlength text representations; it will add the
526 : * JGINFLAG_HASHED bit to the flag value if it does that.
527 : */
528 : static Datum
529 9652 : make_text_key(char flag, const char *str, int len)
530 : {
531 : text *item;
532 : char hashbuf[10];
533 :
534 9652 : if (len > JGIN_MAXLENGTH)
535 : {
536 : uint32 hashval;
537 :
538 0 : hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len));
539 0 : snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval);
540 0 : str = hashbuf;
541 0 : len = 8;
542 0 : flag |= JGINFLAG_HASHED;
543 : }
544 :
545 : /*
546 : * Now build the text Datum. For simplicity we build a 4-byte-header
547 : * varlena text Datum here, but we expect it will get converted to short
548 : * header format when stored in the index.
549 : */
550 9652 : item = (text *) palloc(VARHDRSZ + len + 1);
551 9652 : SET_VARSIZE(item, VARHDRSZ + len + 1);
552 :
553 9652 : *VARDATA(item) = flag;
554 :
555 9652 : memcpy(VARDATA(item) + 1, str, len);
556 :
557 9652 : return PointerGetDatum(item);
558 : }
559 :
560 : /*
561 : * Create a textual representation of a JsonbValue that will serve as a GIN
562 : * key in a jsonb_ops index. is_key is true if the JsonbValue is a key,
563 : * or if it is a string array element (since we pretend those are keys,
564 : * see jsonb.h).
565 : */
566 : static Datum
567 9636 : make_scalar_key(const JsonbValue *scalarVal, bool is_key)
568 : {
569 : Datum item;
570 : char *cstr;
571 :
572 9636 : switch (scalarVal->type)
573 : {
574 : case jbvNull:
575 3 : Assert(!is_key);
576 3 : item = make_text_key(JGINFLAG_NULL, "", 0);
577 3 : break;
578 : case jbvBool:
579 924 : Assert(!is_key);
580 924 : item = make_text_key(JGINFLAG_BOOL,
581 924 : scalarVal->val.boolean ? "t" : "f", 1);
582 924 : break;
583 : case jbvNumeric:
584 1653 : Assert(!is_key);
585 :
586 : /*
587 : * A normalized textual representation, free of trailing zeroes,
588 : * is required so that numerically equal values will produce equal
589 : * strings.
590 : *
591 : * It isn't ideal that numerics are stored in a relatively bulky
592 : * textual format. However, it's a notationally convenient way of
593 : * storing a "union" type in the GIN B-Tree, and indexing Jsonb
594 : * strings takes precedence.
595 : */
596 1653 : cstr = numeric_normalize(scalarVal->val.numeric);
597 1653 : item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr));
598 1653 : pfree(cstr);
599 1653 : break;
600 : case jbvString:
601 14112 : item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR,
602 7056 : scalarVal->val.string.val,
603 : scalarVal->val.string.len);
604 7056 : break;
605 : default:
606 0 : elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type);
607 : item = 0; /* keep compiler quiet */
608 : break;
609 : }
610 :
611 9636 : return item;
612 : }
|