Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * rangetypes_spgist.c
4 : * implementation of quad tree over ranges mapped to 2d-points for SP-GiST.
5 : *
6 : * Quad tree is a data structure similar to a binary tree, but is adapted to
7 : * 2d data. Each inner node of a quad tree contains a point (centroid) which
8 : * divides the 2d-space into 4 quadrants. Each quadrant is associated with a
9 : * child node.
10 : *
11 : * Ranges are mapped to 2d-points so that the lower bound is one dimension,
12 : * and the upper bound is another. By convention, we visualize the lower bound
13 : * to be the horizontal axis, and upper bound the vertical axis.
14 : *
15 : * One quirk with this mapping is the handling of empty ranges. An empty range
16 : * doesn't have lower and upper bounds, so it cannot be mapped to 2d space in
17 : * a straightforward way. To cope with that, the root node can have a 5th
18 : * quadrant, which is reserved for empty ranges. Furthermore, there can be
19 : * inner nodes in the tree with no centroid. They contain only two child nodes,
20 : * one for empty ranges and another for non-empty ones. Such a node can appear
21 : * as the root node, or in the tree under the 5th child of the root node (in
22 : * which case it will only contain empty nodes).
23 : *
24 : * The SP-GiST picksplit function uses medians along both axes as the centroid.
25 : * This implementation only uses the comparison function of the range element
26 : * datatype, therefore it works for any range type.
27 : *
28 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
29 : * Portions Copyright (c) 1994, Regents of the University of California
30 : *
31 : * IDENTIFICATION
32 : * src/backend/utils/adt/rangetypes_spgist.c
33 : *
34 : *-------------------------------------------------------------------------
35 : */
36 :
37 : #include "postgres.h"
38 :
39 : #include "access/spgist.h"
40 : #include "access/stratnum.h"
41 : #include "catalog/pg_type.h"
42 : #include "utils/builtins.h"
43 : #include "utils/datum.h"
44 : #include "utils/rangetypes.h"
45 :
46 : static int16 getQuadrant(TypeCacheEntry *typcache, RangeType *centroid,
47 : RangeType *tst);
48 : static int bound_cmp(const void *a, const void *b, void *arg);
49 :
50 : static int adjacent_inner_consistent(TypeCacheEntry *typcache,
51 : RangeBound *arg, RangeBound *centroid,
52 : RangeBound *prev);
53 : static int adjacent_cmp_bounds(TypeCacheEntry *typcache, RangeBound *arg,
54 : RangeBound *centroid);
55 :
56 : /*
57 : * SP-GiST 'config' interface function.
58 : */
59 : Datum
60 5 : spg_range_quad_config(PG_FUNCTION_ARGS)
61 : {
62 : /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
63 5 : spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
64 :
65 5 : cfg->prefixType = ANYRANGEOID;
66 5 : cfg->labelType = VOIDOID; /* we don't need node labels */
67 5 : cfg->canReturnData = true;
68 5 : cfg->longValuesOK = false;
69 5 : PG_RETURN_VOID();
70 : }
71 :
72 : /*----------
73 : * Determine which quadrant a 2d-mapped range falls into, relative to the
74 : * centroid.
75 : *
76 : * Quadrants are numbered like this:
77 : *
78 : * 4 | 1
79 : * ----+----
80 : * 3 | 2
81 : *
82 : * Where the lower bound of range is the horizontal axis and upper bound the
83 : * vertical axis.
84 : *
85 : * Ranges on one of the axes are taken to lie in the quadrant with higher value
86 : * along perpendicular axis. That is, a value on the horizontal axis is taken
87 : * to belong to quadrant 1 or 4, and a value on the vertical axis is taken to
88 : * belong to quadrant 1 or 2. A range equal to centroid is taken to lie in
89 : * quadrant 1.
90 : *
91 : * Empty ranges are taken to lie in the special quadrant 5.
92 : *----------
93 : */
94 : static int16
95 126414 : getQuadrant(TypeCacheEntry *typcache, RangeType *centroid, RangeType *tst)
96 : {
97 : RangeBound centroidLower,
98 : centroidUpper;
99 : bool centroidEmpty;
100 : RangeBound lower,
101 : upper;
102 : bool empty;
103 :
104 126414 : range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper,
105 : ¢roidEmpty);
106 126414 : range_deserialize(typcache, tst, &lower, &upper, &empty);
107 :
108 126414 : if (empty)
109 2000 : return 5;
110 :
111 124414 : if (range_cmp_bounds(typcache, &lower, ¢roidLower) >= 0)
112 : {
113 113366 : if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0)
114 113245 : return 1;
115 : else
116 121 : return 2;
117 : }
118 : else
119 : {
120 11048 : if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0)
121 2434 : return 4;
122 : else
123 8614 : return 3;
124 : }
125 : }
126 :
127 : /*
128 : * Choose SP-GiST function: choose path for addition of new range.
129 : */
130 : Datum
131 119139 : spg_range_quad_choose(PG_FUNCTION_ARGS)
132 : {
133 119139 : spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
134 119139 : spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
135 119139 : RangeType *inRange = DatumGetRangeType(in->datum),
136 : *centroid;
137 : int16 quadrant;
138 : TypeCacheEntry *typcache;
139 :
140 119139 : if (in->allTheSame)
141 : {
142 2168 : out->resultType = spgMatchNode;
143 : /* nodeN will be set by core */
144 2168 : out->result.matchNode.levelAdd = 0;
145 2168 : out->result.matchNode.restDatum = RangeTypeGetDatum(inRange);
146 2168 : PG_RETURN_VOID();
147 : }
148 :
149 116971 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(inRange));
150 :
151 : /*
152 : * A node with no centroid divides ranges purely on whether they're empty
153 : * or not. All empty ranges go to child node 0, all non-empty ranges go to
154 : * node 1.
155 : */
156 116971 : if (!in->hasPrefix)
157 : {
158 0 : out->resultType = spgMatchNode;
159 0 : if (RangeIsEmpty(inRange))
160 0 : out->result.matchNode.nodeN = 0;
161 : else
162 0 : out->result.matchNode.nodeN = 1;
163 0 : out->result.matchNode.levelAdd = 1;
164 0 : out->result.matchNode.restDatum = RangeTypeGetDatum(inRange);
165 0 : PG_RETURN_VOID();
166 : }
167 :
168 116971 : centroid = DatumGetRangeType(in->prefixDatum);
169 116971 : quadrant = getQuadrant(typcache, centroid, inRange);
170 :
171 116971 : Assert(quadrant <= in->nNodes);
172 :
173 : /* Select node matching to quadrant number */
174 116971 : out->resultType = spgMatchNode;
175 116971 : out->result.matchNode.nodeN = quadrant - 1;
176 116971 : out->result.matchNode.levelAdd = 1;
177 116971 : out->result.matchNode.restDatum = RangeTypeGetDatum(inRange);
178 :
179 116971 : PG_RETURN_VOID();
180 : }
181 :
182 : /*
183 : * Bound comparison for sorting.
184 : */
185 : static int
186 115497 : bound_cmp(const void *a, const void *b, void *arg)
187 : {
188 115497 : RangeBound *ba = (RangeBound *) a;
189 115497 : RangeBound *bb = (RangeBound *) b;
190 115497 : TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
191 :
192 115497 : return range_cmp_bounds(typcache, ba, bb);
193 : }
194 :
195 : /*
196 : * Picksplit SP-GiST function: split ranges into nodes. Select "centroid"
197 : * range and distribute ranges according to quadrants.
198 : */
199 : Datum
200 80 : spg_range_quad_picksplit(PG_FUNCTION_ARGS)
201 : {
202 80 : spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
203 80 : spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
204 : int i;
205 : int j;
206 : int nonEmptyCount;
207 : RangeType *centroid;
208 : bool empty;
209 : TypeCacheEntry *typcache;
210 :
211 : /* Use the median values of lower and upper bounds as the centroid range */
212 : RangeBound *lowerBounds,
213 : *upperBounds;
214 :
215 80 : typcache = range_get_typcache(fcinfo,
216 80 : RangeTypeGetOid(DatumGetRangeType(in->datums[0])));
217 :
218 : /* Allocate memory for bounds */
219 80 : lowerBounds = palloc(sizeof(RangeBound) * in->nTuples);
220 80 : upperBounds = palloc(sizeof(RangeBound) * in->nTuples);
221 80 : j = 0;
222 :
223 : /* Deserialize bounds of ranges, count non-empty ranges */
224 10611 : for (i = 0; i < in->nTuples; i++)
225 : {
226 31593 : range_deserialize(typcache, DatumGetRangeType(in->datums[i]),
227 21062 : &lowerBounds[j], &upperBounds[j], &empty);
228 10531 : if (!empty)
229 9439 : j++;
230 : }
231 80 : nonEmptyCount = j;
232 :
233 : /*
234 : * All the ranges are empty. The best we can do is to construct an inner
235 : * node with no centroid, and put all ranges into node 0. If non-empty
236 : * ranges are added later, they will be routed to node 1.
237 : */
238 80 : if (nonEmptyCount == 0)
239 : {
240 12 : out->nNodes = 2;
241 12 : out->hasPrefix = false;
242 : /* Prefix is empty */
243 12 : out->prefixDatum = PointerGetDatum(NULL);
244 12 : out->nodeLabels = NULL;
245 :
246 12 : out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
247 12 : out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
248 :
249 : /* Place all ranges into node 0 */
250 1104 : for (i = 0; i < in->nTuples; i++)
251 : {
252 1092 : RangeType *range = DatumGetRangeType(in->datums[i]);
253 :
254 1092 : out->leafTupleDatums[i] = RangeTypeGetDatum(range);
255 1092 : out->mapTuplesToNodes[i] = 0;
256 : }
257 12 : PG_RETURN_VOID();
258 : }
259 :
260 : /* Sort range bounds in order to find medians */
261 68 : qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound),
262 : bound_cmp, typcache);
263 68 : qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound),
264 : bound_cmp, typcache);
265 :
266 : /* Construct "centroid" range from medians of lower and upper bounds */
267 136 : centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2],
268 136 : &upperBounds[nonEmptyCount / 2], false);
269 68 : out->hasPrefix = true;
270 68 : out->prefixDatum = RangeTypeGetDatum(centroid);
271 :
272 : /* Create node for empty ranges only if it is a root node */
273 68 : out->nNodes = (in->level == 0) ? 5 : 4;
274 68 : out->nodeLabels = NULL; /* we don't need node labels */
275 :
276 68 : out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
277 68 : out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
278 :
279 : /*
280 : * Assign ranges to corresponding nodes according to quadrants relative to
281 : * "centroid" range.
282 : */
283 9507 : for (i = 0; i < in->nTuples; i++)
284 : {
285 9439 : RangeType *range = DatumGetRangeType(in->datums[i]);
286 9439 : int16 quadrant = getQuadrant(typcache, centroid, range);
287 :
288 9439 : out->leafTupleDatums[i] = RangeTypeGetDatum(range);
289 9439 : out->mapTuplesToNodes[i] = quadrant - 1;
290 : }
291 :
292 68 : PG_RETURN_VOID();
293 : }
294 :
295 : /*
296 : * SP-GiST consistent function for inner nodes: check which nodes are
297 : * consistent with given set of queries.
298 : */
299 : Datum
300 298 : spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
301 : {
302 298 : spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
303 298 : spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
304 : int which;
305 : int i;
306 : MemoryContext oldCtx;
307 :
308 : /*
309 : * For adjacent search we need also previous centroid (if any) to improve
310 : * the precision of the consistent check. In this case needPrevious flag
311 : * is set and centroid is passed into traversalValue.
312 : */
313 298 : bool needPrevious = false;
314 :
315 298 : if (in->allTheSame)
316 : {
317 : /* Report that all nodes should be visited */
318 24 : out->nNodes = in->nNodes;
319 24 : out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
320 216 : for (i = 0; i < in->nNodes; i++)
321 192 : out->nodeNumbers[i] = i;
322 24 : PG_RETURN_VOID();
323 : }
324 :
325 274 : if (!in->hasPrefix)
326 : {
327 : /*
328 : * No centroid on this inner node. Such a node has two child nodes,
329 : * the first for empty ranges, and the second for non-empty ones.
330 : */
331 0 : Assert(in->nNodes == 2);
332 :
333 : /*
334 : * Nth bit of which variable means that (N - 1)th node should be
335 : * visited. Initially all bits are set. Bits of nodes which should be
336 : * skipped will be unset.
337 : */
338 0 : which = (1 << 1) | (1 << 2);
339 0 : for (i = 0; i < in->nkeys; i++)
340 : {
341 0 : StrategyNumber strategy = in->scankeys[i].sk_strategy;
342 : bool empty;
343 :
344 : /*
345 : * The only strategy when second argument of operator is not range
346 : * is RANGESTRAT_CONTAINS_ELEM.
347 : */
348 0 : if (strategy != RANGESTRAT_CONTAINS_ELEM)
349 0 : empty = RangeIsEmpty(
350 : DatumGetRangeType(in->scankeys[i].sk_argument));
351 : else
352 0 : empty = false;
353 :
354 0 : switch (strategy)
355 : {
356 : case RANGESTRAT_BEFORE:
357 : case RANGESTRAT_OVERLEFT:
358 : case RANGESTRAT_OVERLAPS:
359 : case RANGESTRAT_OVERRIGHT:
360 : case RANGESTRAT_AFTER:
361 : case RANGESTRAT_ADJACENT:
362 : /* These strategies return false if any argument is empty */
363 0 : if (empty)
364 0 : which = 0;
365 : else
366 0 : which &= (1 << 2);
367 0 : break;
368 :
369 : case RANGESTRAT_CONTAINS:
370 :
371 : /*
372 : * All ranges contain an empty range. Only non-empty
373 : * ranges can contain a non-empty range.
374 : */
375 0 : if (!empty)
376 0 : which &= (1 << 2);
377 0 : break;
378 :
379 : case RANGESTRAT_CONTAINED_BY:
380 :
381 : /*
382 : * Only an empty range is contained by an empty range.
383 : * Both empty and non-empty ranges can be contained by a
384 : * non-empty range.
385 : */
386 0 : if (empty)
387 0 : which &= (1 << 1);
388 0 : break;
389 :
390 : case RANGESTRAT_CONTAINS_ELEM:
391 0 : which &= (1 << 2);
392 0 : break;
393 :
394 : case RANGESTRAT_EQ:
395 0 : if (empty)
396 0 : which &= (1 << 1);
397 : else
398 0 : which &= (1 << 2);
399 0 : break;
400 :
401 : default:
402 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
403 : break;
404 : }
405 0 : if (which == 0)
406 0 : break; /* no need to consider remaining conditions */
407 : }
408 : }
409 : else
410 : {
411 : RangeBound centroidLower,
412 : centroidUpper;
413 : bool centroidEmpty;
414 : TypeCacheEntry *typcache;
415 : RangeType *centroid;
416 :
417 : /* This node has a centroid. Fetch it. */
418 274 : centroid = DatumGetRangeType(in->prefixDatum);
419 274 : typcache = range_get_typcache(fcinfo,
420 274 : RangeTypeGetOid(DatumGetRangeType(centroid)));
421 274 : range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper,
422 : ¢roidEmpty);
423 :
424 274 : Assert(in->nNodes == 4 || in->nNodes == 5);
425 :
426 : /*
427 : * Nth bit of which variable means that (N - 1)th node (Nth quadrant)
428 : * should be visited. Initially all bits are set. Bits of nodes which
429 : * can be skipped will be unset.
430 : */
431 274 : which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5);
432 :
433 1096 : for (i = 0; i < in->nkeys; i++)
434 : {
435 : StrategyNumber strategy;
436 : RangeBound lower,
437 : upper;
438 : bool empty;
439 274 : RangeType *range = NULL;
440 :
441 274 : RangeType *prevCentroid = NULL;
442 : RangeBound prevLower,
443 : prevUpper;
444 : bool prevEmpty;
445 :
446 : /* Restrictions on range bounds according to scan strategy */
447 274 : RangeBound *minLower = NULL,
448 274 : *maxLower = NULL,
449 274 : *minUpper = NULL,
450 274 : *maxUpper = NULL;
451 :
452 : /* Are the restrictions on range bounds inclusive? */
453 274 : bool inclusive = true;
454 274 : bool strictEmpty = true;
455 : int cmp,
456 : which1,
457 : which2;
458 :
459 274 : strategy = in->scankeys[i].sk_strategy;
460 :
461 : /*
462 : * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but
463 : * the argument is a single element. Expand the single element to
464 : * a range containing only the element, and treat it like
465 : * RANGESTRAT_CONTAINS.
466 : */
467 274 : if (strategy == RANGESTRAT_CONTAINS_ELEM)
468 : {
469 8 : lower.inclusive = true;
470 8 : lower.infinite = false;
471 8 : lower.lower = true;
472 8 : lower.val = in->scankeys[i].sk_argument;
473 :
474 8 : upper.inclusive = true;
475 8 : upper.infinite = false;
476 8 : upper.lower = false;
477 8 : upper.val = in->scankeys[i].sk_argument;
478 :
479 8 : empty = false;
480 :
481 8 : strategy = RANGESTRAT_CONTAINS;
482 : }
483 : else
484 : {
485 266 : range = DatumGetRangeType(in->scankeys[i].sk_argument);
486 266 : range_deserialize(typcache, range, &lower, &upper, &empty);
487 : }
488 :
489 : /*
490 : * Most strategies are handled by forming a bounding box from the
491 : * search key, defined by a minLower, maxLower, minUpper,
492 : * maxUpper. Some modify 'which' directly, to specify exactly
493 : * which quadrants need to be visited.
494 : *
495 : * For most strategies, nothing matches an empty search key, and
496 : * an empty range never matches a non-empty key. If a strategy
497 : * does not behave like that wrt. empty ranges, set strictEmpty to
498 : * false.
499 : */
500 274 : switch (strategy)
501 : {
502 : case RANGESTRAT_BEFORE:
503 :
504 : /*
505 : * Range A is before range B if upper bound of A is lower
506 : * than lower bound of B.
507 : */
508 4 : maxUpper = &lower;
509 4 : inclusive = false;
510 4 : break;
511 :
512 : case RANGESTRAT_OVERLEFT:
513 :
514 : /*
515 : * Range A is overleft to range B if upper bound of A is
516 : * less or equal to upper bound of B.
517 : */
518 22 : maxUpper = &upper;
519 22 : break;
520 :
521 : case RANGESTRAT_OVERLAPS:
522 :
523 : /*
524 : * Non-empty ranges overlap, if lower bound of each range
525 : * is lower or equal to upper bound of the other range.
526 : */
527 8 : maxLower = &upper;
528 8 : minUpper = &lower;
529 8 : break;
530 :
531 : case RANGESTRAT_OVERRIGHT:
532 :
533 : /*
534 : * Range A is overright to range B if lower bound of A is
535 : * greater or equal to lower bound of B.
536 : */
537 66 : minLower = &lower;
538 66 : break;
539 :
540 : case RANGESTRAT_AFTER:
541 :
542 : /*
543 : * Range A is after range B if lower bound of A is greater
544 : * than upper bound of B.
545 : */
546 60 : minLower = &upper;
547 60 : inclusive = false;
548 60 : break;
549 :
550 : case RANGESTRAT_ADJACENT:
551 22 : if (empty)
552 0 : break; /* Skip to strictEmpty check. */
553 :
554 : /*
555 : * Previously selected quadrant could exclude possibility
556 : * for lower or upper bounds to be adjacent. Deserialize
557 : * previous centroid range if present for checking this.
558 : */
559 22 : if (in->traversalValue != (Datum) 0)
560 : {
561 19 : prevCentroid = DatumGetRangeType(in->traversalValue);
562 19 : range_deserialize(typcache, prevCentroid,
563 : &prevLower, &prevUpper, &prevEmpty);
564 : }
565 :
566 : /*
567 : * For a range's upper bound to be adjacent to the
568 : * argument's lower bound, it will be found along the line
569 : * adjacent to (and just below) Y=lower. Therefore, if the
570 : * argument's lower bound is less than the centroid's
571 : * upper bound, the line falls in quadrants 2 and 3; if
572 : * greater, the line falls in quadrants 1 and 4. (see
573 : * adjacent_cmp_bounds for description of edge cases).
574 : */
575 22 : cmp = adjacent_inner_consistent(typcache, &lower,
576 : ¢roidUpper,
577 : prevCentroid ? &prevUpper : NULL);
578 22 : if (cmp > 0)
579 2 : which1 = (1 << 1) | (1 << 4);
580 20 : else if (cmp < 0)
581 5 : which1 = (1 << 2) | (1 << 3);
582 : else
583 15 : which1 = 0;
584 :
585 : /*
586 : * Also search for ranges's adjacent to argument's upper
587 : * bound. They will be found along the line adjacent to
588 : * (and just right of) X=upper, which falls in quadrants 3
589 : * and 4, or 1 and 2.
590 : */
591 22 : cmp = adjacent_inner_consistent(typcache, &upper,
592 : ¢roidLower,
593 : prevCentroid ? &prevLower : NULL);
594 22 : if (cmp > 0)
595 13 : which2 = (1 << 1) | (1 << 2);
596 9 : else if (cmp < 0)
597 7 : which2 = (1 << 3) | (1 << 4);
598 : else
599 2 : which2 = 0;
600 :
601 : /* We must chase down ranges adjacent to either bound. */
602 22 : which &= which1 | which2;
603 :
604 22 : needPrevious = true;
605 22 : break;
606 :
607 : case RANGESTRAT_CONTAINS:
608 :
609 : /*
610 : * Non-empty range A contains non-empty range B if lower
611 : * bound of A is lower or equal to lower bound of range B
612 : * and upper bound of range A is greater or equal to upper
613 : * bound of range A.
614 : *
615 : * All non-empty ranges contain an empty range.
616 : */
617 84 : strictEmpty = false;
618 84 : if (!empty)
619 : {
620 16 : which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
621 16 : maxLower = &lower;
622 16 : minUpper = &upper;
623 : }
624 84 : break;
625 :
626 : case RANGESTRAT_CONTAINED_BY:
627 : /* The opposite of contains. */
628 4 : strictEmpty = false;
629 4 : if (empty)
630 : {
631 : /* An empty range is only contained by an empty range */
632 0 : which &= (1 << 5);
633 : }
634 : else
635 : {
636 4 : minLower = &lower;
637 4 : maxUpper = &upper;
638 : }
639 4 : break;
640 :
641 : case RANGESTRAT_EQ:
642 :
643 : /*
644 : * Equal range can be only in the same quadrant where
645 : * argument would be placed to.
646 : */
647 4 : strictEmpty = false;
648 4 : which &= (1 << getQuadrant(typcache, centroid, range));
649 4 : break;
650 :
651 : default:
652 0 : elog(ERROR, "unrecognized range strategy: %d", strategy);
653 : break;
654 : }
655 :
656 274 : if (strictEmpty)
657 : {
658 182 : if (empty)
659 : {
660 : /* Scan key is empty, no branches are satisfying */
661 0 : which = 0;
662 0 : break;
663 : }
664 : else
665 : {
666 : /* Shouldn't visit tree branch with empty ranges */
667 182 : which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
668 : }
669 : }
670 :
671 : /*
672 : * Using the bounding box, see which quadrants we have to descend
673 : * into.
674 : */
675 274 : if (minLower)
676 : {
677 : /*
678 : * If the centroid's lower bound is less than or equal to the
679 : * minimum lower bound, anything in the 3rd and 4th quadrants
680 : * will have an even smaller lower bound, and thus can't
681 : * match.
682 : */
683 130 : if (range_cmp_bounds(typcache, ¢roidLower, minLower) <= 0)
684 16 : which &= (1 << 1) | (1 << 2) | (1 << 5);
685 : }
686 274 : if (maxLower)
687 : {
688 : /*
689 : * If the centroid's lower bound is greater than the maximum
690 : * lower bound, anything in the 1st and 2nd quadrants will
691 : * also have a greater than or equal lower bound, and thus
692 : * can't match. If the centroid's lower bound is equal to the
693 : * maximum lower bound, we can still exclude the 1st and 2nd
694 : * quadrants if we're looking for a value strictly greater
695 : * than the maximum.
696 : */
697 : int cmp;
698 :
699 24 : cmp = range_cmp_bounds(typcache, ¢roidLower, maxLower);
700 24 : if (cmp > 0 || (!inclusive && cmp == 0))
701 18 : which &= (1 << 3) | (1 << 4) | (1 << 5);
702 : }
703 274 : if (minUpper)
704 : {
705 : /*
706 : * If the centroid's upper bound is less than or equal to the
707 : * minimum upper bound, anything in the 2nd and 3rd quadrants
708 : * will have an even smaller upper bound, and thus can't
709 : * match.
710 : */
711 24 : if (range_cmp_bounds(typcache, ¢roidUpper, minUpper) <= 0)
712 0 : which &= (1 << 1) | (1 << 4) | (1 << 5);
713 : }
714 274 : if (maxUpper)
715 : {
716 : /*
717 : * If the centroid's upper bound is greater than the maximum
718 : * upper bound, anything in the 1st and 4th quadrants will
719 : * also have a greater than or equal upper bound, and thus
720 : * can't match. If the centroid's upper bound is equal to the
721 : * maximum upper bound, we can still exclude the 1st and 4th
722 : * quadrants if we're looking for a value strictly greater
723 : * than the maximum.
724 : */
725 : int cmp;
726 :
727 30 : cmp = range_cmp_bounds(typcache, ¢roidUpper, maxUpper);
728 30 : if (cmp > 0 || (!inclusive && cmp == 0))
729 14 : which &= (1 << 2) | (1 << 3) | (1 << 5);
730 : }
731 :
732 274 : if (which == 0)
733 0 : break; /* no need to consider remaining conditions */
734 : }
735 : }
736 :
737 : /* We must descend into the quadrant(s) identified by 'which' */
738 274 : out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
739 274 : if (needPrevious)
740 22 : out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes);
741 274 : out->nNodes = 0;
742 :
743 : /*
744 : * Elements of traversalValues should be allocated in
745 : * traversalMemoryContext
746 : */
747 274 : oldCtx = MemoryContextSwitchTo(in->traversalMemoryContext);
748 :
749 1393 : for (i = 1; i <= in->nNodes; i++)
750 : {
751 1119 : if (which & (1 << i))
752 : {
753 : /* Save previous prefix if needed */
754 953 : if (needPrevious)
755 : {
756 : Datum previousCentroid;
757 :
758 : /*
759 : * We know, that in->prefixDatum in this place is varlena,
760 : * because it's range
761 : */
762 49 : previousCentroid = datumCopy(in->prefixDatum, false, -1);
763 49 : out->traversalValues[out->nNodes] = (void *) previousCentroid;
764 : }
765 953 : out->nodeNumbers[out->nNodes] = i - 1;
766 953 : out->nNodes++;
767 : }
768 : }
769 :
770 274 : MemoryContextSwitchTo(oldCtx);
771 :
772 274 : PG_RETURN_VOID();
773 : }
774 :
775 : /*
776 : * adjacent_cmp_bounds
777 : *
778 : * Given an argument and centroid bound, this function determines if any
779 : * bounds that are adjacent to the argument are smaller than, or greater than
780 : * or equal to centroid. For brevity, we call the arg < centroid "left", and
781 : * arg >= centroid case "right". This corresponds to how the quadrants are
782 : * arranged, if you imagine that "left" is equivalent to "down" and "right"
783 : * is equivalent to "up".
784 : *
785 : * For the "left" case, returns -1, and for the "right" case, returns 1.
786 : */
787 : static int
788 65 : adjacent_cmp_bounds(TypeCacheEntry *typcache, RangeBound *arg,
789 : RangeBound *centroid)
790 : {
791 : int cmp;
792 :
793 65 : Assert(arg->lower != centroid->lower);
794 :
795 65 : cmp = range_cmp_bounds(typcache, arg, centroid);
796 :
797 65 : if (centroid->lower)
798 : {
799 : /*------
800 : * The argument is an upper bound, we are searching for adjacent lower
801 : * bounds. A matching adjacent lower bound must be *larger* than the
802 : * argument, but only just.
803 : *
804 : * The following table illustrates the desired result with a fixed
805 : * argument bound, and different centroids. The CMP column shows
806 : * the value of 'cmp' variable, and ADJ shows whether the argument
807 : * and centroid are adjacent, per bounds_adjacent(). (N) means we
808 : * don't need to check for that case, because it's implied by CMP.
809 : * With the argument range [..., 500), the adjacent range we're
810 : * searching for is [500, ...):
811 : *
812 : * ARGUMENT CENTROID CMP ADJ
813 : * [..., 500) [498, ...) > (N) [500, ...) is to the right
814 : * [..., 500) [499, ...) = (N) [500, ...) is to the right
815 : * [..., 500) [500, ...) < Y [500, ...) is to the right
816 : * [..., 500) [501, ...) < N [500, ...) is to the left
817 : *
818 : * So, we must search left when the argument is smaller than, and not
819 : * adjacent, to the centroid. Otherwise search right.
820 : *------
821 : */
822 39 : if (cmp < 0 && !bounds_adjacent(typcache, *arg, *centroid))
823 12 : return -1;
824 : else
825 27 : return 1;
826 : }
827 : else
828 : {
829 : /*------
830 : * The argument is a lower bound, we are searching for adjacent upper
831 : * bounds. A matching adjacent upper bound must be *smaller* than the
832 : * argument, but only just.
833 : *
834 : * ARGUMENT CENTROID CMP ADJ
835 : * [500, ...) [..., 499) > (N) [..., 500) is to the right
836 : * [500, ...) [..., 500) > (Y) [..., 500) is to the right
837 : * [500, ...) [..., 501) = (N) [..., 500) is to the left
838 : * [500, ...) [..., 502) < (N) [..., 500) is to the left
839 : *
840 : * We must search left when the argument is smaller than or equal to
841 : * the centroid. Otherwise search right. We don't need to check
842 : * whether the argument is adjacent with the centroid, because it
843 : * doesn't matter.
844 : *------
845 : */
846 26 : if (cmp <= 0)
847 24 : return -1;
848 : else
849 2 : return 1;
850 : }
851 : }
852 :
853 : /*----------
854 : * adjacent_inner_consistent
855 : *
856 : * Like adjacent_cmp_bounds, but also takes into account the previous
857 : * level's centroid. We might've traversed left (or right) at the previous
858 : * node, in search for ranges adjacent to the other bound, even though we
859 : * already ruled out the possibility for any matches in that direction for
860 : * this bound. By comparing the argument with the previous centroid, and
861 : * the previous centroid with the current centroid, we can determine which
862 : * direction we should've moved in at previous level, and which direction we
863 : * actually moved.
864 : *
865 : * If there can be any matches to the left, returns -1. If to the right,
866 : * returns 1. If there can be no matches below this centroid, because we
867 : * already ruled them out at the previous level, returns 0.
868 : *
869 : * XXX: Comparing just the previous and current level isn't foolproof; we
870 : * might still search some branches unnecessarily. For example, imagine that
871 : * we are searching for value 15, and we traverse the following centroids
872 : * (only considering one bound for the moment):
873 : *
874 : * Level 1: 20
875 : * Level 2: 50
876 : * Level 3: 25
877 : *
878 : * At this point, previous centroid is 50, current centroid is 25, and the
879 : * target value is to the left. But because we already moved right from
880 : * centroid 20 to 50 in the first level, there cannot be any values < 20 in
881 : * the current branch. But we don't know that just by looking at the previous
882 : * and current centroid, so we traverse left, unnecessarily. The reason we are
883 : * down this branch is that we're searching for matches with the *other*
884 : * bound. If we kept track of which bound we are searching for explicitly,
885 : * instead of deducing that from the previous and current centroid, we could
886 : * avoid some unnecessary work.
887 : *----------
888 : */
889 : static int
890 44 : adjacent_inner_consistent(TypeCacheEntry *typcache, RangeBound *arg,
891 : RangeBound *centroid, RangeBound *prev)
892 : {
893 44 : if (prev)
894 : {
895 : int prevcmp;
896 : int cmp;
897 :
898 : /*
899 : * Which direction were we supposed to traverse at previous level,
900 : * left or right?
901 : */
902 38 : prevcmp = adjacent_cmp_bounds(typcache, arg, prev);
903 :
904 : /* and which direction did we actually go? */
905 38 : cmp = range_cmp_bounds(typcache, centroid, prev);
906 :
907 : /* if the two don't agree, there's nothing to see here */
908 38 : if ((prevcmp < 0 && cmp >= 0) || (prevcmp > 0 && cmp < 0))
909 17 : return 0;
910 : }
911 :
912 27 : return adjacent_cmp_bounds(typcache, arg, centroid);
913 : }
914 :
915 : /*
916 : * SP-GiST consistent function for leaf nodes: check leaf value against query
917 : * using corresponding function.
918 : */
919 : Datum
920 37850 : spg_range_quad_leaf_consistent(PG_FUNCTION_ARGS)
921 : {
922 37850 : spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
923 37850 : spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
924 37850 : RangeType *leafRange = DatumGetRangeType(in->leafDatum);
925 : TypeCacheEntry *typcache;
926 : bool res;
927 : int i;
928 :
929 : /* all tests are exact */
930 37850 : out->recheck = false;
931 :
932 : /* leafDatum is what it is... */
933 37850 : out->leafValue = in->leafDatum;
934 :
935 37850 : typcache = range_get_typcache(fcinfo, RangeTypeGetOid(leafRange));
936 :
937 : /* Perform the required comparison(s) */
938 37850 : res = true;
939 72321 : for (i = 0; i < in->nkeys; i++)
940 : {
941 37850 : Datum keyDatum = in->scankeys[i].sk_argument;
942 :
943 : /* Call the function corresponding to the scan strategy */
944 37850 : switch (in->scankeys[i].sk_strategy)
945 : {
946 : case RANGESTRAT_BEFORE:
947 476 : res = range_before_internal(typcache, leafRange,
948 476 : DatumGetRangeType(keyDatum));
949 476 : break;
950 : case RANGESTRAT_OVERLEFT:
951 3041 : res = range_overleft_internal(typcache, leafRange,
952 3041 : DatumGetRangeType(keyDatum));
953 3041 : break;
954 : case RANGESTRAT_OVERLAPS:
955 484 : res = range_overlaps_internal(typcache, leafRange,
956 484 : DatumGetRangeType(keyDatum));
957 484 : break;
958 : case RANGESTRAT_OVERRIGHT:
959 9916 : res = range_overright_internal(typcache, leafRange,
960 9916 : DatumGetRangeType(keyDatum));
961 9916 : break;
962 : case RANGESTRAT_AFTER:
963 7238 : res = range_after_internal(typcache, leafRange,
964 7238 : DatumGetRangeType(keyDatum));
965 7238 : break;
966 : case RANGESTRAT_ADJACENT:
967 883 : res = range_adjacent_internal(typcache, leafRange,
968 883 : DatumGetRangeType(keyDatum));
969 883 : break;
970 : case RANGESTRAT_CONTAINS:
971 12884 : res = range_contains_internal(typcache, leafRange,
972 12884 : DatumGetRangeType(keyDatum));
973 12884 : break;
974 : case RANGESTRAT_CONTAINED_BY:
975 2228 : res = range_contained_by_internal(typcache, leafRange,
976 2228 : DatumGetRangeType(keyDatum));
977 2228 : break;
978 : case RANGESTRAT_CONTAINS_ELEM:
979 484 : res = range_contains_elem_internal(typcache, leafRange,
980 : keyDatum);
981 484 : break;
982 : case RANGESTRAT_EQ:
983 216 : res = range_eq_internal(typcache, leafRange,
984 216 : DatumGetRangeType(keyDatum));
985 216 : break;
986 : default:
987 0 : elog(ERROR, "unrecognized range strategy: %d",
988 : in->scankeys[i].sk_strategy);
989 : break;
990 : }
991 :
992 : /*
993 : * If leaf datum doesn't match to a query key, no need to check
994 : * subsequent keys.
995 : */
996 37850 : if (!res)
997 3379 : break;
998 : }
999 :
1000 37850 : PG_RETURN_BOOL(res);
1001 : }
|