Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * mac.c
4 : * PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses.
5 : *
6 : * Portions Copyright (c) 1998-2017, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/utils/adt/mac.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres.h"
15 :
16 : #include "access/hash.h"
17 : #include "lib/hyperloglog.h"
18 : #include "libpq/pqformat.h"
19 : #include "port/pg_bswap.h"
20 : #include "utils/builtins.h"
21 : #include "utils/guc.h"
22 : #include "utils/inet.h"
23 : #include "utils/sortsupport.h"
24 :
25 :
26 : /*
27 : * Utility macros used for sorting and comparing:
28 : */
29 :
30 : #define hibits(addr) \
31 : ((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c)))
32 :
33 : #define lobits(addr) \
34 : ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
35 :
36 : /* sortsupport for macaddr */
37 : typedef struct
38 : {
39 : int64 input_count; /* number of non-null values seen */
40 : bool estimating; /* true if estimating cardinality */
41 :
42 : hyperLogLogState abbr_card; /* cardinality estimator */
43 : } macaddr_sortsupport_state;
44 :
45 : static int macaddr_cmp_internal(macaddr *a1, macaddr *a2);
46 : static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup);
47 : static int macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
48 : static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup);
49 : static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup);
50 :
51 : /*
52 : * MAC address reader. Accepts several common notations.
53 : */
54 :
55 : Datum
56 163 : macaddr_in(PG_FUNCTION_ARGS)
57 : {
58 163 : char *str = PG_GETARG_CSTRING(0);
59 : macaddr *result;
60 : int a,
61 : b,
62 : c,
63 : d,
64 : e,
65 : f;
66 : char junk[2];
67 : int count;
68 :
69 : /* %1s matches iff there is trailing non-whitespace garbage */
70 :
71 163 : count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s",
72 : &a, &b, &c, &d, &e, &f, junk);
73 163 : if (count != 6)
74 8 : count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s",
75 : &a, &b, &c, &d, &e, &f, junk);
76 163 : if (count != 6)
77 7 : count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s",
78 : &a, &b, &c, &d, &e, &f, junk);
79 163 : if (count != 6)
80 6 : count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s",
81 : &a, &b, &c, &d, &e, &f, junk);
82 163 : if (count != 6)
83 5 : count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s",
84 : &a, &b, &c, &d, &e, &f, junk);
85 163 : if (count != 6)
86 4 : count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s",
87 : &a, &b, &c, &d, &e, &f, junk);
88 163 : if (count != 6)
89 3 : count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s",
90 : &a, &b, &c, &d, &e, &f, junk);
91 163 : if (count != 6)
92 2 : ereport(ERROR,
93 : (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
94 : errmsg("invalid input syntax for type %s: \"%s\"", "macaddr",
95 : str)));
96 :
97 322 : if ((a < 0) || (a > 255) || (b < 0) || (b > 255) ||
98 483 : (c < 0) || (c > 255) || (d < 0) || (d > 255) ||
99 322 : (e < 0) || (e > 255) || (f < 0) || (f > 255))
100 0 : ereport(ERROR,
101 : (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
102 : errmsg("invalid octet value in \"macaddr\" value: \"%s\"", str)));
103 :
104 161 : result = (macaddr *) palloc(sizeof(macaddr));
105 :
106 161 : result->a = a;
107 161 : result->b = b;
108 161 : result->c = c;
109 161 : result->d = d;
110 161 : result->e = e;
111 161 : result->f = f;
112 :
113 161 : PG_RETURN_MACADDR_P(result);
114 : }
115 :
116 : /*
117 : * MAC address output function. Fixed format.
118 : */
119 :
120 : Datum
121 87 : macaddr_out(PG_FUNCTION_ARGS)
122 : {
123 87 : macaddr *addr = PG_GETARG_MACADDR_P(0);
124 : char *result;
125 :
126 87 : result = (char *) palloc(32);
127 :
128 522 : snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x",
129 522 : addr->a, addr->b, addr->c, addr->d, addr->e, addr->f);
130 :
131 87 : PG_RETURN_CSTRING(result);
132 : }
133 :
134 : /*
135 : * macaddr_recv - converts external binary format to macaddr
136 : *
137 : * The external representation is just the six bytes, MSB first.
138 : */
139 : Datum
140 0 : macaddr_recv(PG_FUNCTION_ARGS)
141 : {
142 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
143 : macaddr *addr;
144 :
145 0 : addr = (macaddr *) palloc(sizeof(macaddr));
146 :
147 0 : addr->a = pq_getmsgbyte(buf);
148 0 : addr->b = pq_getmsgbyte(buf);
149 0 : addr->c = pq_getmsgbyte(buf);
150 0 : addr->d = pq_getmsgbyte(buf);
151 0 : addr->e = pq_getmsgbyte(buf);
152 0 : addr->f = pq_getmsgbyte(buf);
153 :
154 0 : PG_RETURN_MACADDR_P(addr);
155 : }
156 :
157 : /*
158 : * macaddr_send - converts macaddr to binary format
159 : */
160 : Datum
161 0 : macaddr_send(PG_FUNCTION_ARGS)
162 : {
163 0 : macaddr *addr = PG_GETARG_MACADDR_P(0);
164 : StringInfoData buf;
165 :
166 0 : pq_begintypsend(&buf);
167 0 : pq_sendbyte(&buf, addr->a);
168 0 : pq_sendbyte(&buf, addr->b);
169 0 : pq_sendbyte(&buf, addr->c);
170 0 : pq_sendbyte(&buf, addr->d);
171 0 : pq_sendbyte(&buf, addr->e);
172 0 : pq_sendbyte(&buf, addr->f);
173 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
174 : }
175 :
176 :
177 : /*
178 : * Comparison function for sorting:
179 : */
180 :
181 : static int
182 1941 : macaddr_cmp_internal(macaddr *a1, macaddr *a2)
183 : {
184 1941 : if (hibits(a1) < hibits(a2))
185 684 : return -1;
186 1257 : else if (hibits(a1) > hibits(a2))
187 734 : return 1;
188 523 : else if (lobits(a1) < lobits(a2))
189 14 : return -1;
190 509 : else if (lobits(a1) > lobits(a2))
191 18 : return 1;
192 : else
193 491 : return 0;
194 : }
195 :
196 : Datum
197 0 : macaddr_cmp(PG_FUNCTION_ARGS)
198 : {
199 0 : macaddr *a1 = PG_GETARG_MACADDR_P(0);
200 0 : macaddr *a2 = PG_GETARG_MACADDR_P(1);
201 :
202 0 : PG_RETURN_INT32(macaddr_cmp_internal(a1, a2));
203 : }
204 :
205 : /*
206 : * Boolean comparisons.
207 : */
208 :
209 : Datum
210 511 : macaddr_lt(PG_FUNCTION_ARGS)
211 : {
212 511 : macaddr *a1 = PG_GETARG_MACADDR_P(0);
213 511 : macaddr *a2 = PG_GETARG_MACADDR_P(1);
214 :
215 511 : PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) < 0);
216 : }
217 :
218 : Datum
219 402 : macaddr_le(PG_FUNCTION_ARGS)
220 : {
221 402 : macaddr *a1 = PG_GETARG_MACADDR_P(0);
222 402 : macaddr *a2 = PG_GETARG_MACADDR_P(1);
223 :
224 402 : PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) <= 0);
225 : }
226 :
227 : Datum
228 103 : macaddr_eq(PG_FUNCTION_ARGS)
229 : {
230 103 : macaddr *a1 = PG_GETARG_MACADDR_P(0);
231 103 : macaddr *a2 = PG_GETARG_MACADDR_P(1);
232 :
233 103 : PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) == 0);
234 : }
235 :
236 : Datum
237 332 : macaddr_ge(PG_FUNCTION_ARGS)
238 : {
239 332 : macaddr *a1 = PG_GETARG_MACADDR_P(0);
240 332 : macaddr *a2 = PG_GETARG_MACADDR_P(1);
241 :
242 332 : PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) >= 0);
243 : }
244 :
245 : Datum
246 511 : macaddr_gt(PG_FUNCTION_ARGS)
247 : {
248 511 : macaddr *a1 = PG_GETARG_MACADDR_P(0);
249 511 : macaddr *a2 = PG_GETARG_MACADDR_P(1);
250 :
251 511 : PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) > 0);
252 : }
253 :
254 : Datum
255 4 : macaddr_ne(PG_FUNCTION_ARGS)
256 : {
257 4 : macaddr *a1 = PG_GETARG_MACADDR_P(0);
258 4 : macaddr *a2 = PG_GETARG_MACADDR_P(1);
259 :
260 4 : PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) != 0);
261 : }
262 :
263 : /*
264 : * Support function for hash indexes on macaddr.
265 : */
266 : Datum
267 22 : hashmacaddr(PG_FUNCTION_ARGS)
268 : {
269 22 : macaddr *key = PG_GETARG_MACADDR_P(0);
270 :
271 22 : return hash_any((unsigned char *) key, sizeof(macaddr));
272 : }
273 :
274 : Datum
275 10 : hashmacaddrextended(PG_FUNCTION_ARGS)
276 : {
277 10 : macaddr *key = PG_GETARG_MACADDR_P(0);
278 :
279 10 : return hash_any_extended((unsigned char *) key, sizeof(macaddr),
280 10 : PG_GETARG_INT64(1));
281 : }
282 :
283 : /*
284 : * Arithmetic functions: bitwise NOT, AND, OR.
285 : */
286 : Datum
287 12 : macaddr_not(PG_FUNCTION_ARGS)
288 : {
289 12 : macaddr *addr = PG_GETARG_MACADDR_P(0);
290 : macaddr *result;
291 :
292 12 : result = (macaddr *) palloc(sizeof(macaddr));
293 12 : result->a = ~addr->a;
294 12 : result->b = ~addr->b;
295 12 : result->c = ~addr->c;
296 12 : result->d = ~addr->d;
297 12 : result->e = ~addr->e;
298 12 : result->f = ~addr->f;
299 12 : PG_RETURN_MACADDR_P(result);
300 : }
301 :
302 : Datum
303 12 : macaddr_and(PG_FUNCTION_ARGS)
304 : {
305 12 : macaddr *addr1 = PG_GETARG_MACADDR_P(0);
306 12 : macaddr *addr2 = PG_GETARG_MACADDR_P(1);
307 : macaddr *result;
308 :
309 12 : result = (macaddr *) palloc(sizeof(macaddr));
310 12 : result->a = addr1->a & addr2->a;
311 12 : result->b = addr1->b & addr2->b;
312 12 : result->c = addr1->c & addr2->c;
313 12 : result->d = addr1->d & addr2->d;
314 12 : result->e = addr1->e & addr2->e;
315 12 : result->f = addr1->f & addr2->f;
316 12 : PG_RETURN_MACADDR_P(result);
317 : }
318 :
319 : Datum
320 12 : macaddr_or(PG_FUNCTION_ARGS)
321 : {
322 12 : macaddr *addr1 = PG_GETARG_MACADDR_P(0);
323 12 : macaddr *addr2 = PG_GETARG_MACADDR_P(1);
324 : macaddr *result;
325 :
326 12 : result = (macaddr *) palloc(sizeof(macaddr));
327 12 : result->a = addr1->a | addr2->a;
328 12 : result->b = addr1->b | addr2->b;
329 12 : result->c = addr1->c | addr2->c;
330 12 : result->d = addr1->d | addr2->d;
331 12 : result->e = addr1->e | addr2->e;
332 12 : result->f = addr1->f | addr2->f;
333 12 : PG_RETURN_MACADDR_P(result);
334 : }
335 :
336 : /*
337 : * Truncation function to allow comparing mac manufacturers.
338 : * From suggestion by Alex Pilosov <alex@pilosoft.com>
339 : */
340 : Datum
341 12 : macaddr_trunc(PG_FUNCTION_ARGS)
342 : {
343 12 : macaddr *addr = PG_GETARG_MACADDR_P(0);
344 : macaddr *result;
345 :
346 12 : result = (macaddr *) palloc(sizeof(macaddr));
347 :
348 12 : result->a = addr->a;
349 12 : result->b = addr->b;
350 12 : result->c = addr->c;
351 12 : result->d = 0;
352 12 : result->e = 0;
353 12 : result->f = 0;
354 :
355 12 : PG_RETURN_MACADDR_P(result);
356 : }
357 :
358 : /*
359 : * SortSupport strategy function. Populates a SortSupport struct with the
360 : * information necessary to use comparison by abbreviated keys.
361 : */
362 : Datum
363 2 : macaddr_sortsupport(PG_FUNCTION_ARGS)
364 : {
365 2 : SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
366 :
367 2 : ssup->comparator = macaddr_fast_cmp;
368 2 : ssup->ssup_extra = NULL;
369 :
370 2 : if (ssup->abbreviate)
371 : {
372 : macaddr_sortsupport_state *uss;
373 : MemoryContext oldcontext;
374 :
375 2 : oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
376 :
377 2 : uss = palloc(sizeof(macaddr_sortsupport_state));
378 2 : uss->input_count = 0;
379 2 : uss->estimating = true;
380 2 : initHyperLogLog(&uss->abbr_card, 10);
381 :
382 2 : ssup->ssup_extra = uss;
383 :
384 2 : ssup->comparator = macaddr_cmp_abbrev;
385 2 : ssup->abbrev_converter = macaddr_abbrev_convert;
386 2 : ssup->abbrev_abort = macaddr_abbrev_abort;
387 2 : ssup->abbrev_full_comparator = macaddr_fast_cmp;
388 :
389 2 : MemoryContextSwitchTo(oldcontext);
390 : }
391 :
392 2 : PG_RETURN_VOID();
393 : }
394 :
395 : /*
396 : * SortSupport "traditional" comparison function. Pulls two MAC addresses from
397 : * the heap and runs a standard comparison on them.
398 : */
399 : static int
400 78 : macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup)
401 : {
402 78 : macaddr *arg1 = DatumGetMacaddrP(x);
403 78 : macaddr *arg2 = DatumGetMacaddrP(y);
404 :
405 78 : return macaddr_cmp_internal(arg1, arg2);
406 : }
407 :
408 : /*
409 : * SortSupport abbreviated key comparison function. Compares two MAC addresses
410 : * quickly by treating them like integers, and without having to go to the
411 : * heap.
412 : */
413 : static int
414 98 : macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
415 : {
416 98 : if (x > y)
417 14 : return 1;
418 84 : else if (x == y)
419 78 : return 0;
420 : else
421 6 : return -1;
422 : }
423 :
424 : /*
425 : * Callback for estimating effectiveness of abbreviated key optimization.
426 : *
427 : * We pay no attention to the cardinality of the non-abbreviated data, because
428 : * there is no equality fast-path within authoritative macaddr comparator.
429 : */
430 : static bool
431 2 : macaddr_abbrev_abort(int memtupcount, SortSupport ssup)
432 : {
433 2 : macaddr_sortsupport_state *uss = ssup->ssup_extra;
434 : double abbr_card;
435 :
436 2 : if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
437 2 : return false;
438 :
439 0 : abbr_card = estimateHyperLogLog(&uss->abbr_card);
440 :
441 : /*
442 : * If we have >100k distinct values, then even if we were sorting many
443 : * billion rows we'd likely still break even, and the penalty of undoing
444 : * that many rows of abbrevs would probably not be worth it. At this point
445 : * we stop counting because we know that we're now fully committed.
446 : */
447 0 : if (abbr_card > 100000.0)
448 : {
449 : #ifdef TRACE_SORT
450 0 : if (trace_sort)
451 0 : elog(LOG,
452 : "macaddr_abbrev: estimation ends at cardinality %f"
453 : " after " INT64_FORMAT " values (%d rows)",
454 : abbr_card, uss->input_count, memtupcount);
455 : #endif
456 0 : uss->estimating = false;
457 0 : return false;
458 : }
459 :
460 : /*
461 : * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
462 : * fudge factor allows us to abort earlier on genuinely pathological data
463 : * where we've had exactly one abbreviated value in the first 2k
464 : * (non-null) rows.
465 : */
466 0 : if (abbr_card < uss->input_count / 2000.0 + 0.5)
467 : {
468 : #ifdef TRACE_SORT
469 0 : if (trace_sort)
470 0 : elog(LOG,
471 : "macaddr_abbrev: aborting abbreviation at cardinality %f"
472 : " below threshold %f after " INT64_FORMAT " values (%d rows)",
473 : abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
474 : memtupcount);
475 : #endif
476 0 : return true;
477 : }
478 :
479 : #ifdef TRACE_SORT
480 0 : if (trace_sort)
481 0 : elog(LOG,
482 : "macaddr_abbrev: cardinality %f after " INT64_FORMAT
483 : " values (%d rows)", abbr_card, uss->input_count, memtupcount);
484 : #endif
485 :
486 0 : return false;
487 : }
488 :
489 : /*
490 : * SortSupport conversion routine. Converts original macaddr representation
491 : * to abbreviated key representation.
492 : *
493 : * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an
494 : * unsigned integer for purposes of comparison. On a 64-bit machine, there
495 : * will be two zeroed bytes of padding. The integer is converted to native
496 : * endianness to facilitate easy comparison.
497 : */
498 : static Datum
499 24 : macaddr_abbrev_convert(Datum original, SortSupport ssup)
500 : {
501 24 : macaddr_sortsupport_state *uss = ssup->ssup_extra;
502 24 : macaddr *authoritative = DatumGetMacaddrP(original);
503 : Datum res;
504 :
505 : /*
506 : * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of
507 : * the MAC address in. There will be two bytes of zero padding on the end
508 : * of the least significant bits.
509 : */
510 : #if SIZEOF_DATUM == 8
511 : memset(&res, 0, SIZEOF_DATUM);
512 : memcpy(&res, authoritative, sizeof(macaddr));
513 : #else /* SIZEOF_DATUM != 8 */
514 24 : memcpy(&res, authoritative, SIZEOF_DATUM);
515 : #endif
516 24 : uss->input_count += 1;
517 :
518 : /*
519 : * Cardinality estimation. The estimate uses uint32, so on a 64-bit
520 : * architecture, XOR the two 32-bit halves together to produce slightly
521 : * more entropy. The two zeroed bytes won't have any practical impact on
522 : * this operation.
523 : */
524 24 : if (uss->estimating)
525 : {
526 : uint32 tmp;
527 :
528 : #if SIZEOF_DATUM == 8
529 : tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
530 : #else /* SIZEOF_DATUM != 8 */
531 24 : tmp = (uint32) res;
532 : #endif
533 :
534 24 : addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
535 : }
536 :
537 : /*
538 : * Byteswap on little-endian machines.
539 : *
540 : * This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way
541 : * comparator) works correctly on all platforms. Without this, the
542 : * comparator would have to call memcmp() with a pair of pointers to the
543 : * first byte of each abbreviated key, which is slower.
544 : */
545 24 : res = DatumBigEndianToNative(res);
546 :
547 24 : return res;
548 : }
|