Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * dependencies.c
4 : * POSTGRES functional dependencies
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * IDENTIFICATION
10 : * src/backend/statistics/dependencies.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include "access/htup_details.h"
17 : #include "access/sysattr.h"
18 : #include "catalog/pg_operator.h"
19 : #include "catalog/pg_statistic_ext.h"
20 : #include "lib/stringinfo.h"
21 : #include "optimizer/clauses.h"
22 : #include "optimizer/cost.h"
23 : #include "optimizer/var.h"
24 : #include "nodes/nodes.h"
25 : #include "nodes/relation.h"
26 : #include "statistics/extended_stats_internal.h"
27 : #include "statistics/statistics.h"
28 : #include "utils/bytea.h"
29 : #include "utils/fmgroids.h"
30 : #include "utils/fmgrprotos.h"
31 : #include "utils/lsyscache.h"
32 : #include "utils/syscache.h"
33 : #include "utils/typcache.h"
34 :
35 : /*
36 : * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
37 : * k-permutations of n elements, except that the order does not matter for the
38 : * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
39 : */
40 : typedef struct DependencyGeneratorData
41 : {
42 : int k; /* size of the dependency */
43 : int n; /* number of possible attributes */
44 : int current; /* next dependency to return (index) */
45 : AttrNumber ndependencies; /* number of dependencies generated */
46 : AttrNumber *dependencies; /* array of pre-generated dependencies */
47 : } DependencyGeneratorData;
48 :
49 : typedef DependencyGeneratorData *DependencyGenerator;
50 :
51 : static void generate_dependencies_recurse(DependencyGenerator state,
52 : int index, AttrNumber start, AttrNumber *current);
53 : static void generate_dependencies(DependencyGenerator state);
54 : static DependencyGenerator DependencyGenerator_init(int n, int k);
55 : static void DependencyGenerator_free(DependencyGenerator state);
56 : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
57 : static double dependency_degree(int numrows, HeapTuple *rows, int k,
58 : AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs);
59 : static bool dependency_is_fully_matched(MVDependency *dependency,
60 : Bitmapset *attnums);
61 : static bool dependency_implies_attribute(MVDependency *dependency,
62 : AttrNumber attnum);
63 : static bool dependency_is_compatible_clause(Node *clause, Index relid,
64 : AttrNumber *attnum);
65 : static MVDependency *find_strongest_dependency(StatisticExtInfo *stats,
66 : MVDependencies *dependencies,
67 : Bitmapset *attnums);
68 :
69 : static void
70 58 : generate_dependencies_recurse(DependencyGenerator state, int index,
71 : AttrNumber start, AttrNumber *current)
72 : {
73 : /*
74 : * The generator handles the first (k-1) elements differently from the
75 : * last element.
76 : */
77 58 : if (index < (state->k - 1))
78 : {
79 : AttrNumber i;
80 :
81 : /*
82 : * The first (k-1) values have to be in ascending order, which we
83 : * generate recursively.
84 : */
85 :
86 73 : for (i = start; i < state->n; i++)
87 : {
88 47 : current[index] = i;
89 47 : generate_dependencies_recurse(state, (index + 1), (i + 1), current);
90 : }
91 : }
92 : else
93 : {
94 : int i;
95 :
96 : /*
97 : * the last element is the implied value, which does not respect the
98 : * ascending order. We just need to check that the value is not in the
99 : * first (k-1) elements.
100 : */
101 :
102 126 : for (i = 0; i < state->n; i++)
103 : {
104 : int j;
105 94 : bool match = false;
106 :
107 94 : current[index] = i;
108 :
109 171 : for (j = 0; j < index; j++)
110 : {
111 124 : if (current[j] == i)
112 : {
113 47 : match = true;
114 47 : break;
115 : }
116 : }
117 :
118 : /*
119 : * If the value is not found in the first part of the dependency,
120 : * we're done.
121 : */
122 94 : if (!match)
123 : {
124 47 : state->dependencies = (AttrNumber *) repalloc(state->dependencies,
125 47 : state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
126 47 : memcpy(&state->dependencies[(state->k * state->ndependencies)],
127 47 : current, state->k * sizeof(AttrNumber));
128 47 : state->ndependencies++;
129 : }
130 : }
131 : }
132 58 : }
133 :
134 : /* generate all dependencies (k-permutations of n elements) */
135 : static void
136 11 : generate_dependencies(DependencyGenerator state)
137 : {
138 11 : AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
139 :
140 11 : generate_dependencies_recurse(state, 0, 0, current);
141 :
142 11 : pfree(current);
143 11 : }
144 :
145 : /*
146 : * initialize the DependencyGenerator of variations, and prebuild the variations
147 : *
148 : * This pre-builds all the variations. We could also generate them in
149 : * DependencyGenerator_next(), but this seems simpler.
150 : */
151 : static DependencyGenerator
152 11 : DependencyGenerator_init(int n, int k)
153 : {
154 : DependencyGenerator state;
155 :
156 11 : Assert((n >= k) && (k > 0));
157 :
158 : /* allocate the DependencyGenerator state */
159 11 : state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
160 11 : state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
161 :
162 11 : state->ndependencies = 0;
163 11 : state->current = 0;
164 11 : state->k = k;
165 11 : state->n = n;
166 :
167 : /* now actually pre-generate all the variations */
168 11 : generate_dependencies(state);
169 :
170 11 : return state;
171 : }
172 :
173 : /* free the DependencyGenerator state */
174 : static void
175 11 : DependencyGenerator_free(DependencyGenerator state)
176 : {
177 11 : pfree(state->dependencies);
178 11 : pfree(state);
179 :
180 11 : }
181 :
182 : /* generate next combination */
183 : static AttrNumber *
184 58 : DependencyGenerator_next(DependencyGenerator state)
185 : {
186 58 : if (state->current == state->ndependencies)
187 11 : return NULL;
188 :
189 47 : return &state->dependencies[state->k * state->current++];
190 : }
191 :
192 :
193 : /*
194 : * validates functional dependency on the data
195 : *
196 : * An actual work horse of detecting functional dependencies. Given a variation
197 : * of k attributes, it checks that the first (k-1) are sufficient to determine
198 : * the last one.
199 : */
200 : static double
201 47 : dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
202 : VacAttrStats **stats, Bitmapset *attrs)
203 : {
204 : int i,
205 : j;
206 47 : int nvalues = numrows * k;
207 : MultiSortSupport mss;
208 : SortItem *items;
209 : Datum *values;
210 : bool *isnull;
211 : int *attnums;
212 :
213 : /* counters valid within a group */
214 47 : int group_size = 0;
215 47 : int n_violations = 0;
216 :
217 : /* total number of rows supporting (consistent with) the dependency */
218 47 : int n_supporting_rows = 0;
219 :
220 : /* Make sure we have at least two input attributes. */
221 47 : Assert(k >= 2);
222 :
223 : /* sort info for all attributes columns */
224 47 : mss = multi_sort_init(k);
225 :
226 : /* data for the sort */
227 47 : items = (SortItem *) palloc(numrows * sizeof(SortItem));
228 47 : values = (Datum *) palloc(sizeof(Datum) * nvalues);
229 47 : isnull = (bool *) palloc(sizeof(bool) * nvalues);
230 :
231 : /* fix the pointers to values/isnull */
232 497047 : for (i = 0; i < numrows; i++)
233 : {
234 497000 : items[i].values = &values[i * k];
235 497000 : items[i].isnull = &isnull[i * k];
236 : }
237 :
238 : /*
239 : * Transform the bms into an array, to make accessing i-th member easier.
240 : */
241 47 : attnums = (int *) palloc(sizeof(int) * bms_num_members(attrs));
242 47 : i = 0;
243 47 : j = -1;
244 233 : while ((j = bms_next_member(attrs, j)) >= 0)
245 139 : attnums[i++] = j;
246 :
247 : /*
248 : * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
249 : *
250 : * (a) sort the data lexicographically
251 : *
252 : * (b) split the data into groups by first (k-1) columns
253 : *
254 : * (c) for each group count different values in the last column
255 : */
256 :
257 : /* prepare the sort function for the first dimension, and SortItem array */
258 156 : for (i = 0; i < k; i++)
259 : {
260 109 : VacAttrStats *colstat = stats[dependency[i]];
261 : TypeCacheEntry *type;
262 :
263 109 : type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
264 109 : if (type->lt_opr == InvalidOid) /* shouldn't happen */
265 0 : elog(ERROR, "cache lookup failed for ordering operator for type %u",
266 : colstat->attrtypid);
267 :
268 : /* prepare the sort function for this dimension */
269 109 : multi_sort_add_dimension(mss, i, type->lt_opr);
270 :
271 : /* accumulate all the data for both columns into an array and sort it */
272 1159109 : for (j = 0; j < numrows; j++)
273 : {
274 2318000 : items[j].values[i] =
275 1159000 : heap_getattr(rows[j], attnums[dependency[i]],
276 : stats[i]->tupDesc, &items[j].isnull[i]);
277 : }
278 : }
279 :
280 : /* sort the items so that we can detect the groups */
281 47 : qsort_arg((void *) items, numrows, sizeof(SortItem),
282 : multi_sort_compare, mss);
283 :
284 : /*
285 : * Walk through the sorted array, split it into rows according to the
286 : * first (k-1) columns. If there's a single value in the last column, we
287 : * count the group as 'supporting' the functional dependency. Otherwise we
288 : * count it as contradicting.
289 : */
290 :
291 : /* start with the first row forming a group */
292 47 : group_size = 1;
293 :
294 : /* loop 1 beyond the end of the array so that we count the final group */
295 497047 : for (i = 1; i <= numrows; i++)
296 : {
297 : /*
298 : * Check if the group ended, which may be either because we processed
299 : * all the items (i==numrows), or because the i-th item is not equal
300 : * to the preceding one.
301 : */
302 993953 : if (i == numrows ||
303 496953 : multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
304 : {
305 : /*
306 : * If no violations were found in the group then track the rows of
307 : * the group as supporting the functional dependency.
308 : */
309 12625 : if (n_violations == 0)
310 4609 : n_supporting_rows += group_size;
311 :
312 : /* Reset counters for the new group */
313 12625 : n_violations = 0;
314 12625 : group_size = 1;
315 12625 : continue;
316 : }
317 : /* first columns match, but the last one does not (so contradicting) */
318 484375 : else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
319 53206 : n_violations++;
320 :
321 484375 : group_size++;
322 : }
323 :
324 47 : pfree(items);
325 47 : pfree(values);
326 47 : pfree(isnull);
327 47 : pfree(mss);
328 :
329 : /* Compute the 'degree of validity' as (supporting/total). */
330 47 : return (n_supporting_rows * 1.0 / numrows);
331 : }
332 :
333 : /*
334 : * detects functional dependencies between groups of columns
335 : *
336 : * Generates all possible subsets of columns (variations) and computes
337 : * the degree of validity for each one. For example when creating statistics
338 : * on three columns (a,b,c) there are 9 possible dependencies
339 : *
340 : * two columns three columns
341 : * ----------- -------------
342 : * (a) -> b (a,b) -> c
343 : * (a) -> c (a,c) -> b
344 : * (b) -> a (b,c) -> a
345 : * (b) -> c
346 : * (c) -> a
347 : * (c) -> b
348 : */
349 : MVDependencies *
350 6 : statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
351 : VacAttrStats **stats)
352 : {
353 : int i,
354 : j,
355 : k;
356 : int numattrs;
357 : int *attnums;
358 :
359 : /* result */
360 6 : MVDependencies *dependencies = NULL;
361 :
362 6 : numattrs = bms_num_members(attrs);
363 :
364 : /*
365 : * Transform the bms into an array, to make accessing i-th member easier.
366 : */
367 6 : attnums = palloc(sizeof(int) * bms_num_members(attrs));
368 6 : i = 0;
369 6 : j = -1;
370 29 : while ((j = bms_next_member(attrs, j)) >= 0)
371 17 : attnums[i++] = j;
372 :
373 6 : Assert(numattrs >= 2);
374 :
375 : /*
376 : * We'll try build functional dependencies starting from the smallest ones
377 : * covering just 2 columns, to the largest ones, covering all columns
378 : * included in the statistics object. We start from the smallest ones
379 : * because we want to be able to skip already implied ones.
380 : */
381 17 : for (k = 2; k <= numattrs; k++)
382 : {
383 : AttrNumber *dependency; /* array with k elements */
384 :
385 : /* prepare a DependencyGenerator of variation */
386 11 : DependencyGenerator DependencyGenerator = DependencyGenerator_init(numattrs, k);
387 :
388 : /* generate all possible variations of k values (out of n) */
389 69 : while ((dependency = DependencyGenerator_next(DependencyGenerator)))
390 : {
391 : double degree;
392 : MVDependency *d;
393 :
394 : /* compute how valid the dependency seems */
395 47 : degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
396 :
397 : /*
398 : * if the dependency seems entirely invalid, don't store it it
399 : */
400 47 : if (degree == 0.0)
401 27 : continue;
402 :
403 20 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
404 : + k * sizeof(AttrNumber));
405 :
406 : /* copy the dependency (and keep the indexes into stxkeys) */
407 20 : d->degree = degree;
408 20 : d->nattributes = k;
409 67 : for (i = 0; i < k; i++)
410 47 : d->attributes[i] = attnums[dependency[i]];
411 :
412 : /* initialize the list of dependencies */
413 20 : if (dependencies == NULL)
414 : {
415 : dependencies
416 4 : = (MVDependencies *) palloc0(sizeof(MVDependencies));
417 :
418 4 : dependencies->magic = STATS_DEPS_MAGIC;
419 4 : dependencies->type = STATS_DEPS_TYPE_BASIC;
420 4 : dependencies->ndeps = 0;
421 : }
422 :
423 20 : dependencies->ndeps++;
424 20 : dependencies = (MVDependencies *) repalloc(dependencies,
425 : offsetof(MVDependencies, deps)
426 20 : + dependencies->ndeps * sizeof(MVDependency));
427 :
428 20 : dependencies->deps[dependencies->ndeps - 1] = d;
429 : }
430 :
431 : /*
432 : * we're done with variations of k elements, so free the
433 : * DependencyGenerator
434 : */
435 11 : DependencyGenerator_free(DependencyGenerator);
436 : }
437 :
438 6 : return dependencies;
439 : }
440 :
441 :
442 : /*
443 : * Serialize list of dependencies into a bytea value.
444 : */
445 : bytea *
446 4 : statext_dependencies_serialize(MVDependencies *dependencies)
447 : {
448 : int i;
449 : bytea *output;
450 : char *tmp;
451 : Size len;
452 :
453 : /* we need to store ndeps, with a number of attributes for each one */
454 4 : len = VARHDRSZ + SizeOfDependencies
455 4 : + dependencies->ndeps * SizeOfDependency;
456 :
457 : /* and also include space for the actual attribute numbers and degrees */
458 24 : for (i = 0; i < dependencies->ndeps; i++)
459 20 : len += (sizeof(AttrNumber) * dependencies->deps[i]->nattributes);
460 :
461 4 : output = (bytea *) palloc0(len);
462 4 : SET_VARSIZE(output, len);
463 :
464 4 : tmp = VARDATA(output);
465 :
466 : /* Store the base struct values (magic, type, ndeps) */
467 4 : memcpy(tmp, &dependencies->magic, sizeof(uint32));
468 4 : tmp += sizeof(uint32);
469 4 : memcpy(tmp, &dependencies->type, sizeof(uint32));
470 4 : tmp += sizeof(uint32);
471 4 : memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
472 4 : tmp += sizeof(uint32);
473 :
474 : /* store number of attributes and attribute numbers for each dependency */
475 24 : for (i = 0; i < dependencies->ndeps; i++)
476 : {
477 20 : MVDependency *d = dependencies->deps[i];
478 :
479 20 : memcpy(tmp, d, SizeOfDependency);
480 20 : tmp += SizeOfDependency;
481 :
482 20 : memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
483 20 : tmp += sizeof(AttrNumber) * d->nattributes;
484 :
485 20 : Assert(tmp <= ((char *) output + len));
486 : }
487 :
488 4 : return output;
489 : }
490 :
491 : /*
492 : * Reads serialized dependencies into MVDependencies structure.
493 : */
494 : MVDependencies *
495 20 : statext_dependencies_deserialize(bytea *data)
496 : {
497 : int i;
498 : Size min_expected_size;
499 : MVDependencies *dependencies;
500 : char *tmp;
501 :
502 20 : if (data == NULL)
503 0 : return NULL;
504 :
505 20 : if (VARSIZE_ANY_EXHDR(data) < SizeOfDependencies)
506 0 : elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
507 : VARSIZE_ANY_EXHDR(data), SizeOfDependencies);
508 :
509 : /* read the MVDependencies header */
510 20 : dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
511 :
512 : /* initialize pointer to the data part (skip the varlena header) */
513 20 : tmp = VARDATA_ANY(data);
514 :
515 : /* read the header fields and perform basic sanity checks */
516 20 : memcpy(&dependencies->magic, tmp, sizeof(uint32));
517 20 : tmp += sizeof(uint32);
518 20 : memcpy(&dependencies->type, tmp, sizeof(uint32));
519 20 : tmp += sizeof(uint32);
520 20 : memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
521 20 : tmp += sizeof(uint32);
522 :
523 20 : if (dependencies->magic != STATS_DEPS_MAGIC)
524 0 : elog(ERROR, "invalid dependency magic %d (expected %d)",
525 : dependencies->magic, STATS_DEPS_MAGIC);
526 :
527 20 : if (dependencies->type != STATS_DEPS_TYPE_BASIC)
528 0 : elog(ERROR, "invalid dependency type %d (expected %d)",
529 : dependencies->type, STATS_DEPS_TYPE_BASIC);
530 :
531 20 : if (dependencies->ndeps == 0)
532 0 : ereport(ERROR,
533 : (errcode(ERRCODE_DATA_CORRUPTED),
534 : errmsg("invalid zero-length item array in MVDependencies")));
535 :
536 : /* what minimum bytea size do we expect for those parameters */
537 20 : min_expected_size = SizeOfDependencies +
538 20 : dependencies->ndeps * (SizeOfDependency +
539 : sizeof(AttrNumber) * 2);
540 :
541 20 : if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
542 0 : elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
543 : VARSIZE_ANY_EXHDR(data), min_expected_size);
544 :
545 : /* allocate space for the MCV items */
546 20 : dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
547 20 : + (dependencies->ndeps * sizeof(MVDependency *)));
548 :
549 120 : for (i = 0; i < dependencies->ndeps; i++)
550 : {
551 : double degree;
552 : AttrNumber k;
553 : MVDependency *d;
554 :
555 : /* degree of validity */
556 100 : memcpy(°ree, tmp, sizeof(double));
557 100 : tmp += sizeof(double);
558 :
559 : /* number of attributes */
560 100 : memcpy(&k, tmp, sizeof(AttrNumber));
561 100 : tmp += sizeof(AttrNumber);
562 :
563 : /* is the number of attributes valid? */
564 100 : Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
565 :
566 : /* now that we know the number of attributes, allocate the dependency */
567 100 : d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
568 : + (k * sizeof(AttrNumber)));
569 :
570 100 : d->degree = degree;
571 100 : d->nattributes = k;
572 :
573 : /* copy attribute numbers */
574 100 : memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
575 100 : tmp += sizeof(AttrNumber) * d->nattributes;
576 :
577 100 : dependencies->deps[i] = d;
578 :
579 : /* still within the bytea */
580 100 : Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
581 : }
582 :
583 : /* we should have consumed the whole bytea exactly */
584 20 : Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
585 :
586 20 : return dependencies;
587 : }
588 :
589 : /*
590 : * dependency_is_fully_matched
591 : * checks that a functional dependency is fully matched given clauses on
592 : * attributes (assuming the clauses are suitable equality clauses)
593 : */
594 : static bool
595 105 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
596 : {
597 : int j;
598 :
599 : /*
600 : * Check that the dependency actually is fully covered by clauses. We have
601 : * to translate all attribute numbers, as those are referenced
602 : */
603 284 : for (j = 0; j < dependency->nattributes; j++)
604 : {
605 219 : int attnum = dependency->attributes[j];
606 :
607 219 : if (!bms_is_member(attnum, attnums))
608 40 : return false;
609 : }
610 :
611 65 : return true;
612 : }
613 :
614 : /*
615 : * dependency_implies_attribute
616 : * check that the attnum matches is implied by the functional dependency
617 : */
618 : static bool
619 67 : dependency_implies_attribute(MVDependency *dependency, AttrNumber attnum)
620 : {
621 67 : if (attnum == dependency->attributes[dependency->nattributes - 1])
622 29 : return true;
623 :
624 38 : return false;
625 : }
626 :
627 : /*
628 : * statext_dependencies_load
629 : * Load the functional dependencies for the indicated pg_statistic_ext tuple
630 : */
631 : MVDependencies *
632 20 : statext_dependencies_load(Oid mvoid)
633 : {
634 : bool isnull;
635 : Datum deps;
636 20 : HeapTuple htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid));
637 :
638 20 : if (!HeapTupleIsValid(htup))
639 0 : elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
640 :
641 20 : deps = SysCacheGetAttr(STATEXTOID, htup,
642 : Anum_pg_statistic_ext_stxdependencies, &isnull);
643 20 : Assert(!isnull);
644 :
645 20 : ReleaseSysCache(htup);
646 :
647 20 : return statext_dependencies_deserialize(DatumGetByteaP(deps));
648 : }
649 :
650 : /*
651 : * pg_dependencies_in - input routine for type pg_dependencies.
652 : *
653 : * pg_dependencies is real enough to be a table column, but it has no operations
654 : * of its own, and disallows input too
655 : */
656 : Datum
657 0 : pg_dependencies_in(PG_FUNCTION_ARGS)
658 : {
659 : /*
660 : * pg_node_list stores the data in binary form and parsing text input is
661 : * not needed, so disallow this.
662 : */
663 0 : ereport(ERROR,
664 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
665 : errmsg("cannot accept a value of type %s", "pg_dependencies")));
666 :
667 : PG_RETURN_VOID(); /* keep compiler quiet */
668 : }
669 :
670 : /*
671 : * pg_dependencies - output routine for type pg_dependencies.
672 : */
673 : Datum
674 0 : pg_dependencies_out(PG_FUNCTION_ARGS)
675 : {
676 0 : bytea *data = PG_GETARG_BYTEA_PP(0);
677 0 : MVDependencies *dependencies = statext_dependencies_deserialize(data);
678 : int i,
679 : j;
680 : StringInfoData str;
681 :
682 0 : initStringInfo(&str);
683 0 : appendStringInfoChar(&str, '{');
684 :
685 0 : for (i = 0; i < dependencies->ndeps; i++)
686 : {
687 0 : MVDependency *dependency = dependencies->deps[i];
688 :
689 0 : if (i > 0)
690 0 : appendStringInfoString(&str, ", ");
691 :
692 0 : appendStringInfoChar(&str, '"');
693 0 : for (j = 0; j < dependency->nattributes; j++)
694 : {
695 0 : if (j == dependency->nattributes - 1)
696 0 : appendStringInfoString(&str, " => ");
697 0 : else if (j > 0)
698 0 : appendStringInfoString(&str, ", ");
699 :
700 0 : appendStringInfo(&str, "%d", dependency->attributes[j]);
701 : }
702 0 : appendStringInfo(&str, "\": %f", dependency->degree);
703 : }
704 :
705 0 : appendStringInfoChar(&str, '}');
706 :
707 0 : PG_RETURN_CSTRING(str.data);
708 : }
709 :
710 : /*
711 : * pg_dependencies_recv - binary input routine for type pg_dependencies.
712 : */
713 : Datum
714 0 : pg_dependencies_recv(PG_FUNCTION_ARGS)
715 : {
716 0 : ereport(ERROR,
717 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
718 : errmsg("cannot accept a value of type %s", "pg_dependencies")));
719 :
720 : PG_RETURN_VOID(); /* keep compiler quiet */
721 : }
722 :
723 : /*
724 : * pg_dependencies_send - binary output routine for type pg_dependencies.
725 : *
726 : * Functional dependencies are serialized in a bytea value (although the type
727 : * is named differently), so let's just send that.
728 : */
729 : Datum
730 0 : pg_dependencies_send(PG_FUNCTION_ARGS)
731 : {
732 0 : return byteasend(fcinfo);
733 : }
734 :
735 : /*
736 : * dependency_is_compatible_clause
737 : * Determines if the clause is compatible with functional dependencies
738 : *
739 : * Only OpExprs with two arguments using an equality operator are supported.
740 : * When returning True attnum is set to the attribute number of the Var within
741 : * the supported clause.
742 : *
743 : * Currently we only support Var = Const, or Const = Var. It may be possible
744 : * to expand on this later.
745 : */
746 : static bool
747 49 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
748 : {
749 49 : RestrictInfo *rinfo = (RestrictInfo *) clause;
750 :
751 49 : if (!IsA(rinfo, RestrictInfo))
752 0 : return false;
753 :
754 : /* Pseudoconstants are not really interesting here. */
755 49 : if (rinfo->pseudoconstant)
756 0 : return false;
757 :
758 : /* clauses referencing multiple varnos are incompatible */
759 49 : if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
760 0 : return false;
761 :
762 49 : if (is_opclause(rinfo->clause))
763 : {
764 49 : OpExpr *expr = (OpExpr *) rinfo->clause;
765 : Var *var;
766 49 : bool varonleft = true;
767 : bool ok;
768 :
769 : /* Only expressions with two arguments are considered compatible. */
770 49 : if (list_length(expr->args) != 2)
771 0 : return false;
772 :
773 : /* see if it actually has the right */
774 147 : ok = (NumRelids((Node *) expr) == 1) &&
775 49 : (is_pseudo_constant_clause(lsecond(expr->args)) ||
776 0 : (varonleft = false,
777 0 : is_pseudo_constant_clause(linitial(expr->args))));
778 :
779 : /* unsupported structure (two variables or so) */
780 49 : if (!ok)
781 0 : return false;
782 :
783 : /*
784 : * If it's not "=" operator, just ignore the clause, as it's not
785 : * compatible with functional dependencies.
786 : *
787 : * This uses the function for estimating selectivity, not the operator
788 : * directly (a bit awkward, but well ...).
789 : */
790 49 : if (get_oprrest(expr->opno) != F_EQSEL)
791 0 : return false;
792 :
793 49 : var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
794 :
795 : /* We only support plain Vars for now */
796 49 : if (!IsA(var, Var))
797 0 : return false;
798 :
799 : /* Ensure var is from the correct relation */
800 49 : if (var->varno != relid)
801 0 : return false;
802 :
803 : /* we also better ensure the Var is from the current level */
804 49 : if (var->varlevelsup > 0)
805 0 : return false;
806 :
807 : /* Also skip system attributes (we don't allow stats on those). */
808 49 : if (!AttrNumberIsForUserDefinedAttr(var->varattno))
809 0 : return false;
810 :
811 49 : *attnum = var->varattno;
812 49 : return true;
813 : }
814 :
815 0 : return false;
816 : }
817 :
818 : /*
819 : * find_strongest_dependency
820 : * find the strongest dependency on the attributes
821 : *
822 : * When applying functional dependencies, we start with the strongest
823 : * dependencies. That is, we select the dependency that:
824 : *
825 : * (a) has all attributes covered by equality clauses
826 : *
827 : * (b) has the most attributes
828 : *
829 : * (c) has the highest degree of validity
830 : *
831 : * This guarantees that we eliminate the most redundant conditions first
832 : * (see the comment in dependencies_clauselist_selectivity).
833 : */
834 : static MVDependency *
835 49 : find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
836 : Bitmapset *attnums)
837 : {
838 : int i;
839 49 : MVDependency *strongest = NULL;
840 :
841 : /* number of attnums in clauses */
842 49 : int nattnums = bms_num_members(attnums);
843 :
844 : /*
845 : * Iterate over the MVDependency items and find the strongest one from the
846 : * fully-matched dependencies. We do the cheap checks first, before
847 : * matching it against the attnums.
848 : */
849 294 : for (i = 0; i < dependencies->ndeps; i++)
850 : {
851 245 : MVDependency *dependency = dependencies->deps[i];
852 :
853 : /*
854 : * Skip dependencies referencing more attributes than available
855 : * clauses, as those can't be fully matched.
856 : */
857 245 : if (dependency->nattributes > nattnums)
858 140 : continue;
859 :
860 105 : if (strongest)
861 : {
862 : /* skip dependencies on fewer attributes than the strongest. */
863 67 : if (dependency->nattributes < strongest->nattributes)
864 0 : continue;
865 :
866 : /* also skip weaker dependencies when attribute count matches */
867 125 : if (strongest->nattributes == dependency->nattributes &&
868 58 : strongest->degree > dependency->degree)
869 0 : continue;
870 : }
871 :
872 : /*
873 : * this dependency is stronger, but we must still check that it's
874 : * fully matched to these attnums. We perform this check last as it's
875 : * slightly more expensive than the previous checks.
876 : */
877 105 : if (dependency_is_fully_matched(dependency, attnums))
878 65 : strongest = dependency; /* save new best match */
879 : }
880 :
881 49 : return strongest;
882 : }
883 :
884 : /*
885 : * dependencies_clauselist_selectivity
886 : * Return the estimated selectivity of the given clauses using
887 : * functional dependency statistics, or 1.0 if no useful functional
888 : * dependency statistic exists.
889 : *
890 : * 'estimatedclauses' is an output argument that gets a bit set corresponding
891 : * to the (zero-based) list index of clauses that are included in the
892 : * estimated selectivity.
893 : *
894 : * Given equality clauses on attributes (a,b) we find the strongest dependency
895 : * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
896 : * dependency, we then combine the per-clause selectivities using the formula
897 : *
898 : * P(a,b) = P(a) * [f + (1-f)*P(b)]
899 : *
900 : * where 'f' is the degree of the dependency.
901 : *
902 : * With clauses on more than two attributes, the dependencies are applied
903 : * recursively, starting with the widest/strongest dependencies. For example
904 : * P(a,b,c) is first split like this:
905 : *
906 : * P(a,b,c) = P(a,b) * [f + (1-f)*P(c)]
907 : *
908 : * assuming (a,b=>c) is the strongest dependency.
909 : */
910 : Selectivity
911 20 : dependencies_clauselist_selectivity(PlannerInfo *root,
912 : List *clauses,
913 : int varRelid,
914 : JoinType jointype,
915 : SpecialJoinInfo *sjinfo,
916 : RelOptInfo *rel,
917 : Bitmapset **estimatedclauses)
918 : {
919 20 : Selectivity s1 = 1.0;
920 : ListCell *l;
921 20 : Bitmapset *clauses_attnums = NULL;
922 : StatisticExtInfo *stat;
923 : MVDependencies *dependencies;
924 : AttrNumber *list_attnums;
925 : int listidx;
926 :
927 : /* check if there's any stats that might be useful for us. */
928 20 : if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
929 0 : return 1.0;
930 :
931 20 : list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
932 20 : list_length(clauses));
933 :
934 : /*
935 : * Pre-process the clauses list to extract the attnums seen in each item.
936 : * We need to determine if there's any clauses which will be useful for
937 : * dependency selectivity estimations. Along the way we'll record all of
938 : * the attnums for each clause in a list which we'll reference later so we
939 : * don't need to repeat the same work again. We'll also keep track of all
940 : * attnums seen.
941 : */
942 20 : listidx = 0;
943 69 : foreach(l, clauses)
944 : {
945 49 : Node *clause = (Node *) lfirst(l);
946 : AttrNumber attnum;
947 :
948 49 : if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
949 : {
950 49 : list_attnums[listidx] = attnum;
951 49 : clauses_attnums = bms_add_member(clauses_attnums, attnum);
952 : }
953 : else
954 0 : list_attnums[listidx] = InvalidAttrNumber;
955 :
956 49 : listidx++;
957 : }
958 :
959 : /*
960 : * If there's not at least two distinct attnums then reject the whole list
961 : * of clauses. We must return 1.0 so the calling function's selectivity is
962 : * unaffected.
963 : */
964 20 : if (bms_num_members(clauses_attnums) < 2)
965 : {
966 0 : pfree(list_attnums);
967 0 : return 1.0;
968 : }
969 :
970 : /* find the best suited statistics object for these attnums */
971 20 : stat = choose_best_statistics(rel->statlist, clauses_attnums,
972 : STATS_EXT_DEPENDENCIES);
973 :
974 : /* if no matching stats could be found then we've nothing to do */
975 20 : if (!stat)
976 : {
977 0 : pfree(list_attnums);
978 0 : return 1.0;
979 : }
980 :
981 : /* load the dependency items stored in the statistics object */
982 20 : dependencies = statext_dependencies_load(stat->statOid);
983 :
984 : /*
985 : * Apply the dependencies recursively, starting with the widest/strongest
986 : * ones, and proceeding to the smaller/weaker ones. At the end of each
987 : * round we factor in the selectivity of clauses on the implied attribute,
988 : * and remove the clauses from the list.
989 : */
990 : while (true)
991 : {
992 49 : Selectivity s2 = 1.0;
993 : MVDependency *dependency;
994 :
995 : /* the widest/strongest dependency, fully matched by clauses */
996 49 : dependency = find_strongest_dependency(stat, dependencies,
997 : clauses_attnums);
998 :
999 : /* if no suitable dependency was found, we're done */
1000 49 : if (!dependency)
1001 20 : break;
1002 :
1003 : /*
1004 : * We found an applicable dependency, so find all the clauses on the
1005 : * implied attribute - with dependency (a,b => c) we look for clauses
1006 : * on 'c'.
1007 : */
1008 29 : listidx = -1;
1009 105 : foreach(l, clauses)
1010 : {
1011 : Node *clause;
1012 :
1013 76 : listidx++;
1014 :
1015 : /*
1016 : * Skip incompatible clauses, and ones we've already estimated on.
1017 : */
1018 152 : if (list_attnums[listidx] == InvalidAttrNumber ||
1019 76 : bms_is_member(listidx, *estimatedclauses))
1020 9 : continue;
1021 :
1022 : /*
1023 : * Technically we could find more than one clause for a given
1024 : * attnum. Since these clauses must be equality clauses, we choose
1025 : * to only take the selectivity estimate from the final clause in
1026 : * the list for this attnum. If the attnum happens to be compared
1027 : * to a different Const in another clause then no rows will match
1028 : * anyway. If it happens to be compared to the same Const, then
1029 : * ignoring the additional clause is just the thing to do.
1030 : */
1031 67 : if (dependency_implies_attribute(dependency,
1032 67 : list_attnums[listidx]))
1033 : {
1034 29 : clause = (Node *) lfirst(l);
1035 :
1036 29 : s2 = clause_selectivity(root, clause, varRelid, jointype,
1037 : sjinfo);
1038 :
1039 : /* mark this one as done, so we don't touch it again. */
1040 29 : *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
1041 :
1042 : /*
1043 : * Mark that we've got and used the dependency on this clause.
1044 : * We'll want to ignore this when looking for the next
1045 : * strongest dependency above.
1046 : */
1047 29 : clauses_attnums = bms_del_member(clauses_attnums,
1048 29 : list_attnums[listidx]);
1049 : }
1050 : }
1051 :
1052 : /*
1053 : * Now factor in the selectivity for all the "implied" clauses into
1054 : * the final one, using this formula:
1055 : *
1056 : * P(a,b) = P(a) * (f + (1-f) * P(b))
1057 : *
1058 : * where 'f' is the degree of validity of the dependency.
1059 : */
1060 29 : s1 *= (dependency->degree + (1 - dependency->degree) * s2);
1061 29 : }
1062 :
1063 20 : pfree(dependencies);
1064 20 : pfree(list_attnums);
1065 :
1066 20 : return s1;
1067 : }
|