Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * network_gist.c
4 : * GiST support for network types.
5 : *
6 : * The key thing to understand about this code is the definition of the
7 : * "union" of a set of INET/CIDR values. It works like this:
8 : * 1. If the values are not all of the same IP address family, the "union"
9 : * is a dummy value with family number zero, minbits zero, commonbits zero,
10 : * address all zeroes. Otherwise:
11 : * 2. The union has the common IP address family number.
12 : * 3. The union's minbits value is the smallest netmask length ("ip_bits")
13 : * of all the input values.
14 : * 4. Let C be the number of leading address bits that are in common among
15 : * all the input values (C ranges from 0 to ip_maxbits for the family).
16 : * 5. The union's commonbits value is C.
17 : * 6. The union's address value is the same as the common prefix for its
18 : * first C bits, and is zeroes to the right of that. The physical width
19 : * of the address value is ip_maxbits for the address family.
20 : *
21 : * In a leaf index entry (representing a single key), commonbits is equal to
22 : * ip_maxbits for the address family, minbits is the same as the represented
23 : * value's ip_bits, and the address is equal to the represented address.
24 : * Although it may appear that we're wasting a byte by storing the union
25 : * format and not just the represented INET/CIDR value in leaf keys, the
26 : * extra byte is actually "free" because of alignment considerations.
27 : *
28 : * Note that this design tracks minbits and commonbits independently; in any
29 : * given union value, either might be smaller than the other. This does not
30 : * help us much when descending the tree, because of the way inet comparison
31 : * is defined: at non-leaf nodes we can't compare more than minbits bits
32 : * even if we know them. However, it greatly improves the quality of split
33 : * decisions. Preliminary testing suggests that searches are as much as
34 : * twice as fast as for a simpler design in which a single field doubles as
35 : * the common prefix length and the minimum ip_bits value.
36 : *
37 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
38 : * Portions Copyright (c) 1994, Regents of the University of California
39 : *
40 : *
41 : * IDENTIFICATION
42 : * src/backend/utils/adt/network_gist.c
43 : *
44 : *-------------------------------------------------------------------------
45 : */
46 : #include "postgres.h"
47 :
48 : #include <sys/socket.h>
49 :
50 : #include "access/gist.h"
51 : #include "access/stratnum.h"
52 : #include "utils/builtins.h"
53 : #include "utils/inet.h"
54 :
55 : /*
56 : * Operator strategy numbers used in the GiST inet_ops opclass
57 : */
58 : #define INETSTRAT_OVERLAPS RTOverlapStrategyNumber
59 : #define INETSTRAT_EQ RTEqualStrategyNumber
60 : #define INETSTRAT_NE RTNotEqualStrategyNumber
61 : #define INETSTRAT_LT RTLessStrategyNumber
62 : #define INETSTRAT_LE RTLessEqualStrategyNumber
63 : #define INETSTRAT_GT RTGreaterStrategyNumber
64 : #define INETSTRAT_GE RTGreaterEqualStrategyNumber
65 : #define INETSTRAT_SUB RTSubStrategyNumber
66 : #define INETSTRAT_SUBEQ RTSubEqualStrategyNumber
67 : #define INETSTRAT_SUP RTSuperStrategyNumber
68 : #define INETSTRAT_SUPEQ RTSuperEqualStrategyNumber
69 :
70 :
71 : /*
72 : * Representation of a GiST INET/CIDR index key. This is not identical to
73 : * INET/CIDR because we need to keep track of the length of the common address
74 : * prefix as well as the minimum netmask length. However, as long as it
75 : * follows varlena header rules, the core GiST code won't know the difference.
76 : * For simplicity we always use 1-byte-header varlena format.
77 : */
78 : typedef struct GistInetKey
79 : {
80 : uint8 va_header; /* varlena header --- don't touch directly */
81 : unsigned char family; /* PGSQL_AF_INET, PGSQL_AF_INET6, or zero */
82 : unsigned char minbits; /* minimum number of bits in netmask */
83 : unsigned char commonbits; /* number of common prefix bits in addresses */
84 : unsigned char ipaddr[16]; /* up to 128 bits of common address */
85 : } GistInetKey;
86 :
87 : #define DatumGetInetKeyP(X) ((GistInetKey *) DatumGetPointer(X))
88 : #define InetKeyPGetDatum(X) PointerGetDatum(X)
89 :
90 : /*
91 : * Access macros; not really exciting, but we use these for notational
92 : * consistency with access to INET/CIDR values. Note that family-zero values
93 : * are stored with 4 bytes of address, not 16.
94 : */
95 : #define gk_ip_family(gkptr) ((gkptr)->family)
96 : #define gk_ip_minbits(gkptr) ((gkptr)->minbits)
97 : #define gk_ip_commonbits(gkptr) ((gkptr)->commonbits)
98 : #define gk_ip_addr(gkptr) ((gkptr)->ipaddr)
99 : #define ip_family_maxbits(fam) ((fam) == PGSQL_AF_INET6 ? 128 : 32)
100 :
101 : /* These require that the family field has been set: */
102 : #define gk_ip_addrsize(gkptr) \
103 : (gk_ip_family(gkptr) == PGSQL_AF_INET6 ? 16 : 4)
104 : #define gk_ip_maxbits(gkptr) \
105 : ip_family_maxbits(gk_ip_family(gkptr))
106 : #define SET_GK_VARSIZE(dst) \
107 : SET_VARSIZE_SHORT(dst, offsetof(GistInetKey, ipaddr) + gk_ip_addrsize(dst))
108 :
109 :
110 : /*
111 : * The GiST query consistency check
112 : */
113 : Datum
114 204 : inet_gist_consistent(PG_FUNCTION_ARGS)
115 : {
116 204 : GISTENTRY *ent = (GISTENTRY *) PG_GETARG_POINTER(0);
117 204 : inet *query = PG_GETARG_INET_PP(1);
118 204 : StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
119 :
120 : /* Oid subtype = PG_GETARG_OID(3); */
121 204 : bool *recheck = (bool *) PG_GETARG_POINTER(4);
122 204 : GistInetKey *key = DatumGetInetKeyP(ent->key);
123 : int minbits,
124 : order;
125 :
126 : /* All operators served by this function are exact. */
127 204 : *recheck = false;
128 :
129 : /*
130 : * Check 0: different families
131 : *
132 : * If key represents multiple address families, its children could match
133 : * anything. This can only happen on an inner index page.
134 : */
135 204 : if (gk_ip_family(key) == 0)
136 : {
137 0 : Assert(!GIST_LEAF(ent));
138 0 : PG_RETURN_BOOL(true);
139 : }
140 :
141 : /*
142 : * Check 1: different families
143 : *
144 : * Matching families do not help any of the strategies.
145 : */
146 204 : if (gk_ip_family(key) != ip_family(query))
147 : {
148 36 : switch (strategy)
149 : {
150 : case INETSTRAT_LT:
151 : case INETSTRAT_LE:
152 6 : if (gk_ip_family(key) < ip_family(query))
153 0 : PG_RETURN_BOOL(true);
154 6 : break;
155 :
156 : case INETSTRAT_GE:
157 : case INETSTRAT_GT:
158 6 : if (gk_ip_family(key) > ip_family(query))
159 6 : PG_RETURN_BOOL(true);
160 0 : break;
161 :
162 : case INETSTRAT_NE:
163 3 : PG_RETURN_BOOL(true);
164 : }
165 : /* For all other cases, we can be sure there is no match */
166 27 : PG_RETURN_BOOL(false);
167 : }
168 :
169 : /*
170 : * Check 2: network bit count
171 : *
172 : * Network bit count (ip_bits) helps to check leaves for sub network and
173 : * sup network operators. At non-leaf nodes, we know every child value
174 : * has ip_bits >= gk_ip_minbits(key), so we can avoid descending in some
175 : * cases too.
176 : */
177 168 : switch (strategy)
178 : {
179 : case INETSTRAT_SUB:
180 28 : if (GIST_LEAF(ent) && gk_ip_minbits(key) <= ip_bits(query))
181 20 : PG_RETURN_BOOL(false);
182 8 : break;
183 :
184 : case INETSTRAT_SUBEQ:
185 14 : if (GIST_LEAF(ent) && gk_ip_minbits(key) < ip_bits(query))
186 6 : PG_RETURN_BOOL(false);
187 8 : break;
188 :
189 : case INETSTRAT_SUPEQ:
190 : case INETSTRAT_EQ:
191 28 : if (gk_ip_minbits(key) > ip_bits(query))
192 8 : PG_RETURN_BOOL(false);
193 20 : break;
194 :
195 : case INETSTRAT_SUP:
196 14 : if (gk_ip_minbits(key) >= ip_bits(query))
197 8 : PG_RETURN_BOOL(false);
198 6 : break;
199 : }
200 :
201 : /*
202 : * Check 3: common network bits
203 : *
204 : * Compare available common prefix bits to the query, but not beyond
205 : * either the query's netmask or the minimum netmask among the represented
206 : * values. If these bits don't match the query, we have our answer (and
207 : * may or may not need to descend, depending on the operator). If they do
208 : * match, and we are not at a leaf, we descend in all cases.
209 : *
210 : * Note this is the final check for operators that only consider the
211 : * network part of the address.
212 : */
213 126 : minbits = Min(gk_ip_commonbits(key), gk_ip_minbits(key));
214 126 : minbits = Min(minbits, ip_bits(query));
215 :
216 126 : order = bitncmp(gk_ip_addr(key), ip_addr(query), minbits);
217 :
218 126 : switch (strategy)
219 : {
220 : case INETSTRAT_SUB:
221 : case INETSTRAT_SUBEQ:
222 : case INETSTRAT_OVERLAPS:
223 : case INETSTRAT_SUPEQ:
224 : case INETSTRAT_SUP:
225 46 : PG_RETURN_BOOL(order == 0);
226 :
227 : case INETSTRAT_LT:
228 : case INETSTRAT_LE:
229 28 : if (order > 0)
230 0 : PG_RETURN_BOOL(false);
231 28 : if (order < 0 || !GIST_LEAF(ent))
232 16 : PG_RETURN_BOOL(true);
233 12 : break;
234 :
235 : case INETSTRAT_EQ:
236 10 : if (order != 0)
237 7 : PG_RETURN_BOOL(false);
238 3 : if (!GIST_LEAF(ent))
239 0 : PG_RETURN_BOOL(true);
240 3 : break;
241 :
242 : case INETSTRAT_GE:
243 : case INETSTRAT_GT:
244 28 : if (order < 0)
245 16 : PG_RETURN_BOOL(false);
246 12 : if (order > 0 || !GIST_LEAF(ent))
247 0 : PG_RETURN_BOOL(true);
248 12 : break;
249 :
250 : case INETSTRAT_NE:
251 14 : if (order != 0 || !GIST_LEAF(ent))
252 8 : PG_RETURN_BOOL(true);
253 6 : break;
254 : }
255 :
256 : /*
257 : * Remaining checks are only for leaves and basic comparison strategies.
258 : * See network_cmp_internal() in network.c for the implementation we need
259 : * to match. Note that in a leaf key, commonbits should equal the address
260 : * length, so we compared the whole network parts above.
261 : */
262 33 : Assert(GIST_LEAF(ent));
263 :
264 : /*
265 : * Check 4: network bit count
266 : *
267 : * Next step is to compare netmask widths.
268 : */
269 33 : switch (strategy)
270 : {
271 : case INETSTRAT_LT:
272 : case INETSTRAT_LE:
273 12 : if (gk_ip_minbits(key) < ip_bits(query))
274 0 : PG_RETURN_BOOL(true);
275 12 : if (gk_ip_minbits(key) > ip_bits(query))
276 6 : PG_RETURN_BOOL(false);
277 6 : break;
278 :
279 : case INETSTRAT_EQ:
280 3 : if (gk_ip_minbits(key) != ip_bits(query))
281 0 : PG_RETURN_BOOL(false);
282 3 : break;
283 :
284 : case INETSTRAT_GE:
285 : case INETSTRAT_GT:
286 12 : if (gk_ip_minbits(key) > ip_bits(query))
287 6 : PG_RETURN_BOOL(true);
288 6 : if (gk_ip_minbits(key) < ip_bits(query))
289 0 : PG_RETURN_BOOL(false);
290 6 : break;
291 :
292 : case INETSTRAT_NE:
293 6 : if (gk_ip_minbits(key) != ip_bits(query))
294 3 : PG_RETURN_BOOL(true);
295 3 : break;
296 : }
297 :
298 : /*
299 : * Check 5: whole address
300 : *
301 : * Netmask bit counts are the same, so check all the address bits.
302 : */
303 18 : order = bitncmp(gk_ip_addr(key), ip_addr(query), gk_ip_maxbits(key));
304 :
305 18 : switch (strategy)
306 : {
307 : case INETSTRAT_LT:
308 3 : PG_RETURN_BOOL(order < 0);
309 :
310 : case INETSTRAT_LE:
311 3 : PG_RETURN_BOOL(order <= 0);
312 :
313 : case INETSTRAT_EQ:
314 3 : PG_RETURN_BOOL(order == 0);
315 :
316 : case INETSTRAT_GE:
317 3 : PG_RETURN_BOOL(order >= 0);
318 :
319 : case INETSTRAT_GT:
320 3 : PG_RETURN_BOOL(order > 0);
321 :
322 : case INETSTRAT_NE:
323 3 : PG_RETURN_BOOL(order != 0);
324 : }
325 :
326 0 : elog(ERROR, "unknown strategy for inet GiST");
327 : PG_RETURN_BOOL(false); /* keep compiler quiet */
328 : }
329 :
330 : /*
331 : * Calculate parameters of the union of some GistInetKeys.
332 : *
333 : * Examine the keys in elements m..n inclusive of the GISTENTRY array,
334 : * and compute these output parameters:
335 : * *minfamily_p = minimum IP address family number
336 : * *maxfamily_p = maximum IP address family number
337 : * *minbits_p = minimum netmask width
338 : * *commonbits_p = number of leading bits in common among the addresses
339 : *
340 : * minbits and commonbits are forced to zero if there's more than one
341 : * address family.
342 : */
343 : static void
344 0 : calc_inet_union_params(GISTENTRY *ent,
345 : int m, int n,
346 : int *minfamily_p,
347 : int *maxfamily_p,
348 : int *minbits_p,
349 : int *commonbits_p)
350 : {
351 : int minfamily,
352 : maxfamily,
353 : minbits,
354 : commonbits;
355 : unsigned char *addr;
356 : GistInetKey *tmp;
357 : int i;
358 :
359 : /* Must be at least one key. */
360 0 : Assert(m <= n);
361 :
362 : /* Initialize variables using the first key. */
363 0 : tmp = DatumGetInetKeyP(ent[m].key);
364 0 : minfamily = maxfamily = gk_ip_family(tmp);
365 0 : minbits = gk_ip_minbits(tmp);
366 0 : commonbits = gk_ip_commonbits(tmp);
367 0 : addr = gk_ip_addr(tmp);
368 :
369 : /* Scan remaining keys. */
370 0 : for (i = m + 1; i <= n; i++)
371 : {
372 0 : tmp = DatumGetInetKeyP(ent[i].key);
373 :
374 : /* Determine range of family numbers */
375 0 : if (minfamily > gk_ip_family(tmp))
376 0 : minfamily = gk_ip_family(tmp);
377 0 : if (maxfamily < gk_ip_family(tmp))
378 0 : maxfamily = gk_ip_family(tmp);
379 :
380 : /* Find minimum minbits */
381 0 : if (minbits > gk_ip_minbits(tmp))
382 0 : minbits = gk_ip_minbits(tmp);
383 :
384 : /* Find minimum number of bits in common */
385 0 : if (commonbits > gk_ip_commonbits(tmp))
386 0 : commonbits = gk_ip_commonbits(tmp);
387 0 : if (commonbits > 0)
388 0 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
389 : }
390 :
391 : /* Force minbits/commonbits to zero if more than one family. */
392 0 : if (minfamily != maxfamily)
393 0 : minbits = commonbits = 0;
394 :
395 0 : *minfamily_p = minfamily;
396 0 : *maxfamily_p = maxfamily;
397 0 : *minbits_p = minbits;
398 0 : *commonbits_p = commonbits;
399 0 : }
400 :
401 : /*
402 : * Same as above, but the GISTENTRY elements to examine are those with
403 : * indices listed in the offsets[] array.
404 : */
405 : static void
406 0 : calc_inet_union_params_indexed(GISTENTRY *ent,
407 : OffsetNumber *offsets, int noffsets,
408 : int *minfamily_p,
409 : int *maxfamily_p,
410 : int *minbits_p,
411 : int *commonbits_p)
412 : {
413 : int minfamily,
414 : maxfamily,
415 : minbits,
416 : commonbits;
417 : unsigned char *addr;
418 : GistInetKey *tmp;
419 : int i;
420 :
421 : /* Must be at least one key. */
422 0 : Assert(noffsets > 0);
423 :
424 : /* Initialize variables using the first key. */
425 0 : tmp = DatumGetInetKeyP(ent[offsets[0]].key);
426 0 : minfamily = maxfamily = gk_ip_family(tmp);
427 0 : minbits = gk_ip_minbits(tmp);
428 0 : commonbits = gk_ip_commonbits(tmp);
429 0 : addr = gk_ip_addr(tmp);
430 :
431 : /* Scan remaining keys. */
432 0 : for (i = 1; i < noffsets; i++)
433 : {
434 0 : tmp = DatumGetInetKeyP(ent[offsets[i]].key);
435 :
436 : /* Determine range of family numbers */
437 0 : if (minfamily > gk_ip_family(tmp))
438 0 : minfamily = gk_ip_family(tmp);
439 0 : if (maxfamily < gk_ip_family(tmp))
440 0 : maxfamily = gk_ip_family(tmp);
441 :
442 : /* Find minimum minbits */
443 0 : if (minbits > gk_ip_minbits(tmp))
444 0 : minbits = gk_ip_minbits(tmp);
445 :
446 : /* Find minimum number of bits in common */
447 0 : if (commonbits > gk_ip_commonbits(tmp))
448 0 : commonbits = gk_ip_commonbits(tmp);
449 0 : if (commonbits > 0)
450 0 : commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
451 : }
452 :
453 : /* Force minbits/commonbits to zero if more than one family. */
454 0 : if (minfamily != maxfamily)
455 0 : minbits = commonbits = 0;
456 :
457 0 : *minfamily_p = minfamily;
458 0 : *maxfamily_p = maxfamily;
459 0 : *minbits_p = minbits;
460 0 : *commonbits_p = commonbits;
461 0 : }
462 :
463 : /*
464 : * Construct a GistInetKey representing a union value.
465 : *
466 : * Inputs are the family/minbits/commonbits values to use, plus a pointer to
467 : * the address field of one of the union inputs. (Since we're going to copy
468 : * just the bits-in-common, it doesn't matter which one.)
469 : */
470 : static GistInetKey *
471 0 : build_inet_union_key(int family, int minbits, int commonbits,
472 : unsigned char *addr)
473 : {
474 : GistInetKey *result;
475 :
476 : /* Make sure any unused bits are zeroed. */
477 0 : result = (GistInetKey *) palloc0(sizeof(GistInetKey));
478 :
479 0 : gk_ip_family(result) = family;
480 0 : gk_ip_minbits(result) = minbits;
481 0 : gk_ip_commonbits(result) = commonbits;
482 :
483 : /* Clone appropriate bytes of the address. */
484 0 : if (commonbits > 0)
485 0 : memcpy(gk_ip_addr(result), addr, (commonbits + 7) / 8);
486 :
487 : /* Clean any unwanted bits in the last partial byte. */
488 0 : if (commonbits % 8 != 0)
489 0 : gk_ip_addr(result)[commonbits / 8] &= ~(0xFF >> (commonbits % 8));
490 :
491 : /* Set varlena header correctly. */
492 0 : SET_GK_VARSIZE(result);
493 :
494 0 : return result;
495 : }
496 :
497 :
498 : /*
499 : * The GiST union function
500 : *
501 : * See comments at head of file for the definition of the union.
502 : */
503 : Datum
504 0 : inet_gist_union(PG_FUNCTION_ARGS)
505 : {
506 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
507 0 : GISTENTRY *ent = entryvec->vector;
508 : int minfamily,
509 : maxfamily,
510 : minbits,
511 : commonbits;
512 : unsigned char *addr;
513 : GistInetKey *tmp,
514 : *result;
515 :
516 : /* Determine parameters of the union. */
517 0 : calc_inet_union_params(ent, 0, entryvec->n - 1,
518 : &minfamily, &maxfamily,
519 : &minbits, &commonbits);
520 :
521 : /* If more than one family, emit family number zero. */
522 0 : if (minfamily != maxfamily)
523 0 : minfamily = 0;
524 :
525 : /* Initialize address using the first key. */
526 0 : tmp = DatumGetInetKeyP(ent[0].key);
527 0 : addr = gk_ip_addr(tmp);
528 :
529 : /* Construct the union value. */
530 0 : result = build_inet_union_key(minfamily, minbits, commonbits, addr);
531 :
532 0 : PG_RETURN_POINTER(result);
533 : }
534 :
535 : /*
536 : * The GiST compress function
537 : *
538 : * Convert an inet value to GistInetKey.
539 : */
540 : Datum
541 17 : inet_gist_compress(PG_FUNCTION_ARGS)
542 : {
543 17 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
544 : GISTENTRY *retval;
545 :
546 17 : if (entry->leafkey)
547 : {
548 17 : retval = palloc(sizeof(GISTENTRY));
549 17 : if (DatumGetPointer(entry->key) != NULL)
550 : {
551 17 : inet *in = DatumGetInetPP(entry->key);
552 : GistInetKey *r;
553 :
554 17 : r = (GistInetKey *) palloc0(sizeof(GistInetKey));
555 :
556 17 : gk_ip_family(r) = ip_family(in);
557 17 : gk_ip_minbits(r) = ip_bits(in);
558 17 : gk_ip_commonbits(r) = gk_ip_maxbits(r);
559 17 : memcpy(gk_ip_addr(r), ip_addr(in), gk_ip_addrsize(r));
560 17 : SET_GK_VARSIZE(r);
561 :
562 17 : gistentryinit(*retval, PointerGetDatum(r),
563 : entry->rel, entry->page,
564 : entry->offset, FALSE);
565 : }
566 : else
567 : {
568 0 : gistentryinit(*retval, (Datum) 0,
569 : entry->rel, entry->page,
570 : entry->offset, FALSE);
571 : }
572 : }
573 : else
574 0 : retval = entry;
575 17 : PG_RETURN_POINTER(retval);
576 : }
577 :
578 : /*
579 : * The GiST decompress function
580 : *
581 : * do not do anything --- we just use the stored GistInetKey as-is.
582 : */
583 : Datum
584 204 : inet_gist_decompress(PG_FUNCTION_ARGS)
585 : {
586 204 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
587 :
588 204 : PG_RETURN_POINTER(entry);
589 : }
590 :
591 : /*
592 : * The GiST fetch function
593 : *
594 : * Reconstruct the original inet datum from a GistInetKey.
595 : */
596 : Datum
597 3 : inet_gist_fetch(PG_FUNCTION_ARGS)
598 : {
599 3 : GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
600 3 : GistInetKey *key = DatumGetInetKeyP(entry->key);
601 : GISTENTRY *retval;
602 : inet *dst;
603 :
604 3 : dst = (inet *) palloc0(sizeof(inet));
605 :
606 3 : ip_family(dst) = gk_ip_family(key);
607 3 : ip_bits(dst) = gk_ip_minbits(key);
608 3 : memcpy(ip_addr(dst), gk_ip_addr(key), ip_addrsize(dst));
609 3 : SET_INET_VARSIZE(dst);
610 :
611 3 : retval = palloc(sizeof(GISTENTRY));
612 3 : gistentryinit(*retval, InetPGetDatum(dst), entry->rel, entry->page,
613 : entry->offset, FALSE);
614 :
615 3 : PG_RETURN_POINTER(retval);
616 : }
617 :
618 : /*
619 : * The GiST page split penalty function
620 : *
621 : * Charge a large penalty if address family doesn't match, or a somewhat
622 : * smaller one if the new value would degrade the union's minbits
623 : * (minimum netmask width). Otherwise, penalty is inverse of the
624 : * new number of common address bits.
625 : */
626 : Datum
627 0 : inet_gist_penalty(PG_FUNCTION_ARGS)
628 : {
629 0 : GISTENTRY *origent = (GISTENTRY *) PG_GETARG_POINTER(0);
630 0 : GISTENTRY *newent = (GISTENTRY *) PG_GETARG_POINTER(1);
631 0 : float *penalty = (float *) PG_GETARG_POINTER(2);
632 0 : GistInetKey *orig = DatumGetInetKeyP(origent->key),
633 0 : *new = DatumGetInetKeyP(newent->key);
634 : int commonbits;
635 :
636 0 : if (gk_ip_family(orig) == gk_ip_family(new))
637 : {
638 0 : if (gk_ip_minbits(orig) <= gk_ip_minbits(new))
639 : {
640 0 : commonbits = bitncommon(gk_ip_addr(orig), gk_ip_addr(new),
641 0 : Min(gk_ip_commonbits(orig),
642 : gk_ip_commonbits(new)));
643 0 : if (commonbits > 0)
644 0 : *penalty = 1.0f / commonbits;
645 : else
646 0 : *penalty = 2;
647 : }
648 : else
649 0 : *penalty = 3;
650 : }
651 : else
652 0 : *penalty = 4;
653 :
654 0 : PG_RETURN_POINTER(penalty);
655 : }
656 :
657 : /*
658 : * The GiST PickSplit method
659 : *
660 : * There are two ways to split. First one is to split by address families,
661 : * if there are multiple families appearing in the input.
662 : *
663 : * The second and more common way is to split by addresses. To achieve this,
664 : * determine the number of leading bits shared by all the keys, then split on
665 : * the next bit. (We don't currently consider the netmask widths while doing
666 : * this; should we?) If we fail to get a nontrivial split that way, split
667 : * 50-50.
668 : */
669 : Datum
670 0 : inet_gist_picksplit(PG_FUNCTION_ARGS)
671 : {
672 0 : GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
673 0 : GIST_SPLITVEC *splitvec = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
674 0 : GISTENTRY *ent = entryvec->vector;
675 : int minfamily,
676 : maxfamily,
677 : minbits,
678 : commonbits;
679 : unsigned char *addr;
680 : GistInetKey *tmp,
681 : *left_union,
682 : *right_union;
683 : int maxoff,
684 : nbytes;
685 : OffsetNumber i,
686 : *left,
687 : *right;
688 :
689 0 : maxoff = entryvec->n - 1;
690 0 : nbytes = (maxoff + 1) * sizeof(OffsetNumber);
691 :
692 0 : left = (OffsetNumber *) palloc(nbytes);
693 0 : right = (OffsetNumber *) palloc(nbytes);
694 :
695 0 : splitvec->spl_left = left;
696 0 : splitvec->spl_right = right;
697 :
698 0 : splitvec->spl_nleft = 0;
699 0 : splitvec->spl_nright = 0;
700 :
701 : /* Determine parameters of the union of all the inputs. */
702 0 : calc_inet_union_params(ent, FirstOffsetNumber, maxoff,
703 : &minfamily, &maxfamily,
704 : &minbits, &commonbits);
705 :
706 0 : if (minfamily != maxfamily)
707 : {
708 : /* Multiple families, so split by family. */
709 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
710 : {
711 : /*
712 : * If there's more than 2 families, all but maxfamily go into the
713 : * left union. This could only happen if the inputs include some
714 : * IPv4, some IPv6, and some already-multiple-family unions.
715 : */
716 0 : tmp = DatumGetInetKeyP(ent[i].key);
717 0 : if (gk_ip_family(tmp) != maxfamily)
718 0 : left[splitvec->spl_nleft++] = i;
719 : else
720 0 : right[splitvec->spl_nright++] = i;
721 : }
722 : }
723 : else
724 : {
725 : /*
726 : * Split on the next bit after the common bits. If that yields a
727 : * trivial split, try the next bit position to the right. Repeat till
728 : * success; or if we run out of bits, do an arbitrary 50-50 split.
729 : */
730 0 : int maxbits = ip_family_maxbits(minfamily);
731 :
732 0 : while (commonbits < maxbits)
733 : {
734 : /* Split using the commonbits'th bit position. */
735 0 : int bitbyte = commonbits / 8;
736 0 : int bitmask = 0x80 >> (commonbits % 8);
737 :
738 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
739 :
740 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
741 : {
742 0 : tmp = DatumGetInetKeyP(ent[i].key);
743 0 : addr = gk_ip_addr(tmp);
744 0 : if ((addr[bitbyte] & bitmask) == 0)
745 0 : left[splitvec->spl_nleft++] = i;
746 : else
747 0 : right[splitvec->spl_nright++] = i;
748 : }
749 :
750 0 : if (splitvec->spl_nleft > 0 && splitvec->spl_nright > 0)
751 0 : break; /* success */
752 0 : commonbits++;
753 : }
754 :
755 0 : if (commonbits >= maxbits)
756 : {
757 : /* Failed ... do a 50-50 split. */
758 0 : splitvec->spl_nleft = splitvec->spl_nright = 0;
759 :
760 0 : for (i = FirstOffsetNumber; i <= maxoff / 2; i = OffsetNumberNext(i))
761 : {
762 0 : left[splitvec->spl_nleft++] = i;
763 : }
764 0 : for (; i <= maxoff; i = OffsetNumberNext(i))
765 : {
766 0 : right[splitvec->spl_nright++] = i;
767 : }
768 : }
769 : }
770 :
771 : /*
772 : * Compute the union value for each side from scratch. In most cases we
773 : * could approximate the union values with what we already know, but this
774 : * ensures that each side has minbits and commonbits set as high as
775 : * possible.
776 : */
777 0 : calc_inet_union_params_indexed(ent, left, splitvec->spl_nleft,
778 : &minfamily, &maxfamily,
779 : &minbits, &commonbits);
780 0 : if (minfamily != maxfamily)
781 0 : minfamily = 0;
782 0 : tmp = DatumGetInetKeyP(ent[left[0]].key);
783 0 : addr = gk_ip_addr(tmp);
784 0 : left_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
785 0 : splitvec->spl_ldatum = PointerGetDatum(left_union);
786 :
787 0 : calc_inet_union_params_indexed(ent, right, splitvec->spl_nright,
788 : &minfamily, &maxfamily,
789 : &minbits, &commonbits);
790 0 : if (minfamily != maxfamily)
791 0 : minfamily = 0;
792 0 : tmp = DatumGetInetKeyP(ent[right[0]].key);
793 0 : addr = gk_ip_addr(tmp);
794 0 : right_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
795 0 : splitvec->spl_rdatum = PointerGetDatum(right_union);
796 :
797 0 : PG_RETURN_POINTER(splitvec);
798 : }
799 :
800 : /*
801 : * The GiST equality function
802 : */
803 : Datum
804 0 : inet_gist_same(PG_FUNCTION_ARGS)
805 : {
806 0 : GistInetKey *left = DatumGetInetKeyP(PG_GETARG_DATUM(0));
807 0 : GistInetKey *right = DatumGetInetKeyP(PG_GETARG_DATUM(1));
808 0 : bool *result = (bool *) PG_GETARG_POINTER(2);
809 :
810 0 : *result = (gk_ip_family(left) == gk_ip_family(right) &&
811 0 : gk_ip_minbits(left) == gk_ip_minbits(right) &&
812 0 : gk_ip_commonbits(left) == gk_ip_commonbits(right) &&
813 0 : memcmp(gk_ip_addr(left), gk_ip_addr(right),
814 0 : gk_ip_addrsize(left)) == 0);
815 :
816 0 : PG_RETURN_POINTER(result);
817 : }
|