LCOV - code coverage report
Current view: top level - src/backend/statistics - dependencies.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 263 317 83.0 %
Date: 2017-09-29 13:40:31 Functions: 15 19 78.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * dependencies.c
       4             :  *    POSTGRES functional dependencies
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/statistics/dependencies.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include "access/htup_details.h"
      17             : #include "access/sysattr.h"
      18             : #include "catalog/pg_operator.h"
      19             : #include "catalog/pg_statistic_ext.h"
      20             : #include "lib/stringinfo.h"
      21             : #include "optimizer/clauses.h"
      22             : #include "optimizer/cost.h"
      23             : #include "optimizer/var.h"
      24             : #include "nodes/nodes.h"
      25             : #include "nodes/relation.h"
      26             : #include "statistics/extended_stats_internal.h"
      27             : #include "statistics/statistics.h"
      28             : #include "utils/bytea.h"
      29             : #include "utils/fmgroids.h"
      30             : #include "utils/fmgrprotos.h"
      31             : #include "utils/lsyscache.h"
      32             : #include "utils/syscache.h"
      33             : #include "utils/typcache.h"
      34             : 
      35             : /*
      36             :  * Internal state for DependencyGenerator of dependencies. Dependencies are similar to
      37             :  * k-permutations of n elements, except that the order does not matter for the
      38             :  * first (k-1) elements. That is, (a,b=>c) and (b,a=>c) are equivalent.
      39             :  */
      40             : typedef struct DependencyGeneratorData
      41             : {
      42             :     int         k;              /* size of the dependency */
      43             :     int         n;              /* number of possible attributes */
      44             :     int         current;        /* next dependency to return (index) */
      45             :     AttrNumber  ndependencies;  /* number of dependencies generated */
      46             :     AttrNumber *dependencies;   /* array of pre-generated dependencies  */
      47             : } DependencyGeneratorData;
      48             : 
      49             : typedef DependencyGeneratorData *DependencyGenerator;
      50             : 
      51             : static void generate_dependencies_recurse(DependencyGenerator state,
      52             :                               int index, AttrNumber start, AttrNumber *current);
      53             : static void generate_dependencies(DependencyGenerator state);
      54             : static DependencyGenerator DependencyGenerator_init(int n, int k);
      55             : static void DependencyGenerator_free(DependencyGenerator state);
      56             : static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
      57             : static double dependency_degree(int numrows, HeapTuple *rows, int k,
      58             :                   AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs);
      59             : static bool dependency_is_fully_matched(MVDependency *dependency,
      60             :                             Bitmapset *attnums);
      61             : static bool dependency_implies_attribute(MVDependency *dependency,
      62             :                              AttrNumber attnum);
      63             : static bool dependency_is_compatible_clause(Node *clause, Index relid,
      64             :                                 AttrNumber *attnum);
      65             : static MVDependency *find_strongest_dependency(StatisticExtInfo *stats,
      66             :                           MVDependencies *dependencies,
      67             :                           Bitmapset *attnums);
      68             : 
      69             : static void
      70          58 : generate_dependencies_recurse(DependencyGenerator state, int index,
      71             :                               AttrNumber start, AttrNumber *current)
      72             : {
      73             :     /*
      74             :      * The generator handles the first (k-1) elements differently from the
      75             :      * last element.
      76             :      */
      77          58 :     if (index < (state->k - 1))
      78             :     {
      79             :         AttrNumber  i;
      80             : 
      81             :         /*
      82             :          * The first (k-1) values have to be in ascending order, which we
      83             :          * generate recursively.
      84             :          */
      85             : 
      86          73 :         for (i = start; i < state->n; i++)
      87             :         {
      88          47 :             current[index] = i;
      89          47 :             generate_dependencies_recurse(state, (index + 1), (i + 1), current);
      90             :         }
      91             :     }
      92             :     else
      93             :     {
      94             :         int         i;
      95             : 
      96             :         /*
      97             :          * the last element is the implied value, which does not respect the
      98             :          * ascending order. We just need to check that the value is not in the
      99             :          * first (k-1) elements.
     100             :          */
     101             : 
     102         126 :         for (i = 0; i < state->n; i++)
     103             :         {
     104             :             int         j;
     105          94 :             bool        match = false;
     106             : 
     107          94 :             current[index] = i;
     108             : 
     109         171 :             for (j = 0; j < index; j++)
     110             :             {
     111         124 :                 if (current[j] == i)
     112             :                 {
     113          47 :                     match = true;
     114          47 :                     break;
     115             :                 }
     116             :             }
     117             : 
     118             :             /*
     119             :              * If the value is not found in the first part of the dependency,
     120             :              * we're done.
     121             :              */
     122          94 :             if (!match)
     123             :             {
     124          47 :                 state->dependencies = (AttrNumber *) repalloc(state->dependencies,
     125          47 :                                                               state->k * (state->ndependencies + 1) * sizeof(AttrNumber));
     126          47 :                 memcpy(&state->dependencies[(state->k * state->ndependencies)],
     127          47 :                        current, state->k * sizeof(AttrNumber));
     128          47 :                 state->ndependencies++;
     129             :             }
     130             :         }
     131             :     }
     132          58 : }
     133             : 
     134             : /* generate all dependencies (k-permutations of n elements) */
     135             : static void
     136          11 : generate_dependencies(DependencyGenerator state)
     137             : {
     138          11 :     AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k);
     139             : 
     140          11 :     generate_dependencies_recurse(state, 0, 0, current);
     141             : 
     142          11 :     pfree(current);
     143          11 : }
     144             : 
     145             : /*
     146             :  * initialize the DependencyGenerator of variations, and prebuild the variations
     147             :  *
     148             :  * This pre-builds all the variations. We could also generate them in
     149             :  * DependencyGenerator_next(), but this seems simpler.
     150             :  */
     151             : static DependencyGenerator
     152          11 : DependencyGenerator_init(int n, int k)
     153             : {
     154             :     DependencyGenerator state;
     155             : 
     156          11 :     Assert((n >= k) && (k > 0));
     157             : 
     158             :     /* allocate the DependencyGenerator state */
     159          11 :     state = (DependencyGenerator) palloc0(sizeof(DependencyGeneratorData));
     160          11 :     state->dependencies = (AttrNumber *) palloc(k * sizeof(AttrNumber));
     161             : 
     162          11 :     state->ndependencies = 0;
     163          11 :     state->current = 0;
     164          11 :     state->k = k;
     165          11 :     state->n = n;
     166             : 
     167             :     /* now actually pre-generate all the variations */
     168          11 :     generate_dependencies(state);
     169             : 
     170          11 :     return state;
     171             : }
     172             : 
     173             : /* free the DependencyGenerator state */
     174             : static void
     175          11 : DependencyGenerator_free(DependencyGenerator state)
     176             : {
     177          11 :     pfree(state->dependencies);
     178          11 :     pfree(state);
     179             : 
     180          11 : }
     181             : 
     182             : /* generate next combination */
     183             : static AttrNumber *
     184          58 : DependencyGenerator_next(DependencyGenerator state)
     185             : {
     186          58 :     if (state->current == state->ndependencies)
     187          11 :         return NULL;
     188             : 
     189          47 :     return &state->dependencies[state->k * state->current++];
     190             : }
     191             : 
     192             : 
     193             : /*
     194             :  * validates functional dependency on the data
     195             :  *
     196             :  * An actual work horse of detecting functional dependencies. Given a variation
     197             :  * of k attributes, it checks that the first (k-1) are sufficient to determine
     198             :  * the last one.
     199             :  */
     200             : static double
     201          47 : dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
     202             :                   VacAttrStats **stats, Bitmapset *attrs)
     203             : {
     204             :     int         i,
     205             :                 j;
     206          47 :     int         nvalues = numrows * k;
     207             :     MultiSortSupport mss;
     208             :     SortItem   *items;
     209             :     Datum      *values;
     210             :     bool       *isnull;
     211             :     int        *attnums;
     212             : 
     213             :     /* counters valid within a group */
     214          47 :     int         group_size = 0;
     215          47 :     int         n_violations = 0;
     216             : 
     217             :     /* total number of rows supporting (consistent with) the dependency */
     218          47 :     int         n_supporting_rows = 0;
     219             : 
     220             :     /* Make sure we have at least two input attributes. */
     221          47 :     Assert(k >= 2);
     222             : 
     223             :     /* sort info for all attributes columns */
     224          47 :     mss = multi_sort_init(k);
     225             : 
     226             :     /* data for the sort */
     227          47 :     items = (SortItem *) palloc(numrows * sizeof(SortItem));
     228          47 :     values = (Datum *) palloc(sizeof(Datum) * nvalues);
     229          47 :     isnull = (bool *) palloc(sizeof(bool) * nvalues);
     230             : 
     231             :     /* fix the pointers to values/isnull */
     232      497047 :     for (i = 0; i < numrows; i++)
     233             :     {
     234      497000 :         items[i].values = &values[i * k];
     235      497000 :         items[i].isnull = &isnull[i * k];
     236             :     }
     237             : 
     238             :     /*
     239             :      * Transform the bms into an array, to make accessing i-th member easier.
     240             :      */
     241          47 :     attnums = (int *) palloc(sizeof(int) * bms_num_members(attrs));
     242          47 :     i = 0;
     243          47 :     j = -1;
     244         233 :     while ((j = bms_next_member(attrs, j)) >= 0)
     245         139 :         attnums[i++] = j;
     246             : 
     247             :     /*
     248             :      * Verify the dependency (a,b,...)->z, using a rather simple algorithm:
     249             :      *
     250             :      * (a) sort the data lexicographically
     251             :      *
     252             :      * (b) split the data into groups by first (k-1) columns
     253             :      *
     254             :      * (c) for each group count different values in the last column
     255             :      */
     256             : 
     257             :     /* prepare the sort function for the first dimension, and SortItem array */
     258         156 :     for (i = 0; i < k; i++)
     259             :     {
     260         109 :         VacAttrStats *colstat = stats[dependency[i]];
     261             :         TypeCacheEntry *type;
     262             : 
     263         109 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     264         109 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     265           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     266             :                  colstat->attrtypid);
     267             : 
     268             :         /* prepare the sort function for this dimension */
     269         109 :         multi_sort_add_dimension(mss, i, type->lt_opr);
     270             : 
     271             :         /* accumulate all the data for both columns into an array and sort it */
     272     1159109 :         for (j = 0; j < numrows; j++)
     273             :         {
     274     2318000 :             items[j].values[i] =
     275     1159000 :                 heap_getattr(rows[j], attnums[dependency[i]],
     276             :                              stats[i]->tupDesc, &items[j].isnull[i]);
     277             :         }
     278             :     }
     279             : 
     280             :     /* sort the items so that we can detect the groups */
     281          47 :     qsort_arg((void *) items, numrows, sizeof(SortItem),
     282             :               multi_sort_compare, mss);
     283             : 
     284             :     /*
     285             :      * Walk through the sorted array, split it into rows according to the
     286             :      * first (k-1) columns. If there's a single value in the last column, we
     287             :      * count the group as 'supporting' the functional dependency. Otherwise we
     288             :      * count it as contradicting.
     289             :      */
     290             : 
     291             :     /* start with the first row forming a group */
     292          47 :     group_size = 1;
     293             : 
     294             :     /* loop 1 beyond the end of the array so that we count the final group */
     295      497047 :     for (i = 1; i <= numrows; i++)
     296             :     {
     297             :         /*
     298             :          * Check if the group ended, which may be either because we processed
     299             :          * all the items (i==numrows), or because the i-th item is not equal
     300             :          * to the preceding one.
     301             :          */
     302      993953 :         if (i == numrows ||
     303      496953 :             multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0)
     304             :         {
     305             :             /*
     306             :              * If no violations were found in the group then track the rows of
     307             :              * the group as supporting the functional dependency.
     308             :              */
     309       12625 :             if (n_violations == 0)
     310        4609 :                 n_supporting_rows += group_size;
     311             : 
     312             :             /* Reset counters for the new group */
     313       12625 :             n_violations = 0;
     314       12625 :             group_size = 1;
     315       12625 :             continue;
     316             :         }
     317             :         /* first columns match, but the last one does not (so contradicting) */
     318      484375 :         else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0)
     319       53206 :             n_violations++;
     320             : 
     321      484375 :         group_size++;
     322             :     }
     323             : 
     324          47 :     pfree(items);
     325          47 :     pfree(values);
     326          47 :     pfree(isnull);
     327          47 :     pfree(mss);
     328             : 
     329             :     /* Compute the 'degree of validity' as (supporting/total). */
     330          47 :     return (n_supporting_rows * 1.0 / numrows);
     331             : }
     332             : 
     333             : /*
     334             :  * detects functional dependencies between groups of columns
     335             :  *
     336             :  * Generates all possible subsets of columns (variations) and computes
     337             :  * the degree of validity for each one. For example when creating statistics
     338             :  * on three columns (a,b,c) there are 9 possible dependencies
     339             :  *
     340             :  *     two columns            three columns
     341             :  *     -----------            -------------
     342             :  *     (a) -> b                (a,b) -> c
     343             :  *     (a) -> c                (a,c) -> b
     344             :  *     (b) -> a                (b,c) -> a
     345             :  *     (b) -> c
     346             :  *     (c) -> a
     347             :  *     (c) -> b
     348             :  */
     349             : MVDependencies *
     350           6 : statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
     351             :                            VacAttrStats **stats)
     352             : {
     353             :     int         i,
     354             :                 j,
     355             :                 k;
     356             :     int         numattrs;
     357             :     int        *attnums;
     358             : 
     359             :     /* result */
     360           6 :     MVDependencies *dependencies = NULL;
     361             : 
     362           6 :     numattrs = bms_num_members(attrs);
     363             : 
     364             :     /*
     365             :      * Transform the bms into an array, to make accessing i-th member easier.
     366             :      */
     367           6 :     attnums = palloc(sizeof(int) * bms_num_members(attrs));
     368           6 :     i = 0;
     369           6 :     j = -1;
     370          29 :     while ((j = bms_next_member(attrs, j)) >= 0)
     371          17 :         attnums[i++] = j;
     372             : 
     373           6 :     Assert(numattrs >= 2);
     374             : 
     375             :     /*
     376             :      * We'll try build functional dependencies starting from the smallest ones
     377             :      * covering just 2 columns, to the largest ones, covering all columns
     378             :      * included in the statistics object.  We start from the smallest ones
     379             :      * because we want to be able to skip already implied ones.
     380             :      */
     381          17 :     for (k = 2; k <= numattrs; k++)
     382             :     {
     383             :         AttrNumber *dependency; /* array with k elements */
     384             : 
     385             :         /* prepare a DependencyGenerator of variation */
     386          11 :         DependencyGenerator DependencyGenerator = DependencyGenerator_init(numattrs, k);
     387             : 
     388             :         /* generate all possible variations of k values (out of n) */
     389          69 :         while ((dependency = DependencyGenerator_next(DependencyGenerator)))
     390             :         {
     391             :             double      degree;
     392             :             MVDependency *d;
     393             : 
     394             :             /* compute how valid the dependency seems */
     395          47 :             degree = dependency_degree(numrows, rows, k, dependency, stats, attrs);
     396             : 
     397             :             /*
     398             :              * if the dependency seems entirely invalid, don't store it it
     399             :              */
     400          47 :             if (degree == 0.0)
     401          27 :                 continue;
     402             : 
     403          20 :             d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     404             :                                          + k * sizeof(AttrNumber));
     405             : 
     406             :             /* copy the dependency (and keep the indexes into stxkeys) */
     407          20 :             d->degree = degree;
     408          20 :             d->nattributes = k;
     409          67 :             for (i = 0; i < k; i++)
     410          47 :                 d->attributes[i] = attnums[dependency[i]];
     411             : 
     412             :             /* initialize the list of dependencies */
     413          20 :             if (dependencies == NULL)
     414             :             {
     415             :                 dependencies
     416           4 :                     = (MVDependencies *) palloc0(sizeof(MVDependencies));
     417             : 
     418           4 :                 dependencies->magic = STATS_DEPS_MAGIC;
     419           4 :                 dependencies->type = STATS_DEPS_TYPE_BASIC;
     420           4 :                 dependencies->ndeps = 0;
     421             :             }
     422             : 
     423          20 :             dependencies->ndeps++;
     424          20 :             dependencies = (MVDependencies *) repalloc(dependencies,
     425             :                                                        offsetof(MVDependencies, deps)
     426          20 :                                                        + dependencies->ndeps * sizeof(MVDependency));
     427             : 
     428          20 :             dependencies->deps[dependencies->ndeps - 1] = d;
     429             :         }
     430             : 
     431             :         /*
     432             :          * we're done with variations of k elements, so free the
     433             :          * DependencyGenerator
     434             :          */
     435          11 :         DependencyGenerator_free(DependencyGenerator);
     436             :     }
     437             : 
     438           6 :     return dependencies;
     439             : }
     440             : 
     441             : 
     442             : /*
     443             :  * Serialize list of dependencies into a bytea value.
     444             :  */
     445             : bytea *
     446           4 : statext_dependencies_serialize(MVDependencies *dependencies)
     447             : {
     448             :     int         i;
     449             :     bytea      *output;
     450             :     char       *tmp;
     451             :     Size        len;
     452             : 
     453             :     /* we need to store ndeps, with a number of attributes for each one */
     454           4 :     len = VARHDRSZ + SizeOfDependencies
     455           4 :         + dependencies->ndeps * SizeOfDependency;
     456             : 
     457             :     /* and also include space for the actual attribute numbers and degrees */
     458          24 :     for (i = 0; i < dependencies->ndeps; i++)
     459          20 :         len += (sizeof(AttrNumber) * dependencies->deps[i]->nattributes);
     460             : 
     461           4 :     output = (bytea *) palloc0(len);
     462           4 :     SET_VARSIZE(output, len);
     463             : 
     464           4 :     tmp = VARDATA(output);
     465             : 
     466             :     /* Store the base struct values (magic, type, ndeps) */
     467           4 :     memcpy(tmp, &dependencies->magic, sizeof(uint32));
     468           4 :     tmp += sizeof(uint32);
     469           4 :     memcpy(tmp, &dependencies->type, sizeof(uint32));
     470           4 :     tmp += sizeof(uint32);
     471           4 :     memcpy(tmp, &dependencies->ndeps, sizeof(uint32));
     472           4 :     tmp += sizeof(uint32);
     473             : 
     474             :     /* store number of attributes and attribute numbers for each dependency */
     475          24 :     for (i = 0; i < dependencies->ndeps; i++)
     476             :     {
     477          20 :         MVDependency *d = dependencies->deps[i];
     478             : 
     479          20 :         memcpy(tmp, d, SizeOfDependency);
     480          20 :         tmp += SizeOfDependency;
     481             : 
     482          20 :         memcpy(tmp, d->attributes, sizeof(AttrNumber) * d->nattributes);
     483          20 :         tmp += sizeof(AttrNumber) * d->nattributes;
     484             : 
     485          20 :         Assert(tmp <= ((char *) output + len));
     486             :     }
     487             : 
     488           4 :     return output;
     489             : }
     490             : 
     491             : /*
     492             :  * Reads serialized dependencies into MVDependencies structure.
     493             :  */
     494             : MVDependencies *
     495          20 : statext_dependencies_deserialize(bytea *data)
     496             : {
     497             :     int         i;
     498             :     Size        min_expected_size;
     499             :     MVDependencies *dependencies;
     500             :     char       *tmp;
     501             : 
     502          20 :     if (data == NULL)
     503           0 :         return NULL;
     504             : 
     505          20 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfDependencies)
     506           0 :         elog(ERROR, "invalid MVDependencies size %zd (expected at least %zd)",
     507             :              VARSIZE_ANY_EXHDR(data), SizeOfDependencies);
     508             : 
     509             :     /* read the MVDependencies header */
     510          20 :     dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies));
     511             : 
     512             :     /* initialize pointer to the data part (skip the varlena header) */
     513          20 :     tmp = VARDATA_ANY(data);
     514             : 
     515             :     /* read the header fields and perform basic sanity checks */
     516          20 :     memcpy(&dependencies->magic, tmp, sizeof(uint32));
     517          20 :     tmp += sizeof(uint32);
     518          20 :     memcpy(&dependencies->type, tmp, sizeof(uint32));
     519          20 :     tmp += sizeof(uint32);
     520          20 :     memcpy(&dependencies->ndeps, tmp, sizeof(uint32));
     521          20 :     tmp += sizeof(uint32);
     522             : 
     523          20 :     if (dependencies->magic != STATS_DEPS_MAGIC)
     524           0 :         elog(ERROR, "invalid dependency magic %d (expected %d)",
     525             :              dependencies->magic, STATS_DEPS_MAGIC);
     526             : 
     527          20 :     if (dependencies->type != STATS_DEPS_TYPE_BASIC)
     528           0 :         elog(ERROR, "invalid dependency type %d (expected %d)",
     529             :              dependencies->type, STATS_DEPS_TYPE_BASIC);
     530             : 
     531          20 :     if (dependencies->ndeps == 0)
     532           0 :         ereport(ERROR,
     533             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     534             :                  errmsg("invalid zero-length item array in MVDependencies")));
     535             : 
     536             :     /* what minimum bytea size do we expect for those parameters */
     537          20 :     min_expected_size = SizeOfDependencies +
     538          20 :         dependencies->ndeps * (SizeOfDependency +
     539             :                                sizeof(AttrNumber) * 2);
     540             : 
     541          20 :     if (VARSIZE_ANY_EXHDR(data) < min_expected_size)
     542           0 :         elog(ERROR, "invalid dependencies size %zd (expected at least %zd)",
     543             :              VARSIZE_ANY_EXHDR(data), min_expected_size);
     544             : 
     545             :     /* allocate space for the MCV items */
     546          20 :     dependencies = repalloc(dependencies, offsetof(MVDependencies, deps)
     547          20 :                             + (dependencies->ndeps * sizeof(MVDependency *)));
     548             : 
     549         120 :     for (i = 0; i < dependencies->ndeps; i++)
     550             :     {
     551             :         double      degree;
     552             :         AttrNumber  k;
     553             :         MVDependency *d;
     554             : 
     555             :         /* degree of validity */
     556         100 :         memcpy(&degree, tmp, sizeof(double));
     557         100 :         tmp += sizeof(double);
     558             : 
     559             :         /* number of attributes */
     560         100 :         memcpy(&k, tmp, sizeof(AttrNumber));
     561         100 :         tmp += sizeof(AttrNumber);
     562             : 
     563             :         /* is the number of attributes valid? */
     564         100 :         Assert((k >= 2) && (k <= STATS_MAX_DIMENSIONS));
     565             : 
     566             :         /* now that we know the number of attributes, allocate the dependency */
     567         100 :         d = (MVDependency *) palloc0(offsetof(MVDependency, attributes)
     568             :                                      + (k * sizeof(AttrNumber)));
     569             : 
     570         100 :         d->degree = degree;
     571         100 :         d->nattributes = k;
     572             : 
     573             :         /* copy attribute numbers */
     574         100 :         memcpy(d->attributes, tmp, sizeof(AttrNumber) * d->nattributes);
     575         100 :         tmp += sizeof(AttrNumber) * d->nattributes;
     576             : 
     577         100 :         dependencies->deps[i] = d;
     578             : 
     579             :         /* still within the bytea */
     580         100 :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
     581             :     }
     582             : 
     583             :     /* we should have consumed the whole bytea exactly */
     584          20 :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
     585             : 
     586          20 :     return dependencies;
     587             : }
     588             : 
     589             : /*
     590             :  * dependency_is_fully_matched
     591             :  *      checks that a functional dependency is fully matched given clauses on
     592             :  *      attributes (assuming the clauses are suitable equality clauses)
     593             :  */
     594             : static bool
     595         105 : dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
     596             : {
     597             :     int         j;
     598             : 
     599             :     /*
     600             :      * Check that the dependency actually is fully covered by clauses. We have
     601             :      * to translate all attribute numbers, as those are referenced
     602             :      */
     603         284 :     for (j = 0; j < dependency->nattributes; j++)
     604             :     {
     605         219 :         int         attnum = dependency->attributes[j];
     606             : 
     607         219 :         if (!bms_is_member(attnum, attnums))
     608          40 :             return false;
     609             :     }
     610             : 
     611          65 :     return true;
     612             : }
     613             : 
     614             : /*
     615             :  * dependency_implies_attribute
     616             :  *      check that the attnum matches is implied by the functional dependency
     617             :  */
     618             : static bool
     619          67 : dependency_implies_attribute(MVDependency *dependency, AttrNumber attnum)
     620             : {
     621          67 :     if (attnum == dependency->attributes[dependency->nattributes - 1])
     622          29 :         return true;
     623             : 
     624          38 :     return false;
     625             : }
     626             : 
     627             : /*
     628             :  * statext_dependencies_load
     629             :  *      Load the functional dependencies for the indicated pg_statistic_ext tuple
     630             :  */
     631             : MVDependencies *
     632          20 : statext_dependencies_load(Oid mvoid)
     633             : {
     634             :     bool        isnull;
     635             :     Datum       deps;
     636          20 :     HeapTuple   htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid));
     637             : 
     638          20 :     if (!HeapTupleIsValid(htup))
     639           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     640             : 
     641          20 :     deps = SysCacheGetAttr(STATEXTOID, htup,
     642             :                            Anum_pg_statistic_ext_stxdependencies, &isnull);
     643          20 :     Assert(!isnull);
     644             : 
     645          20 :     ReleaseSysCache(htup);
     646             : 
     647          20 :     return statext_dependencies_deserialize(DatumGetByteaP(deps));
     648             : }
     649             : 
     650             : /*
     651             :  * pg_dependencies_in       - input routine for type pg_dependencies.
     652             :  *
     653             :  * pg_dependencies is real enough to be a table column, but it has no operations
     654             :  * of its own, and disallows input too
     655             :  */
     656             : Datum
     657           0 : pg_dependencies_in(PG_FUNCTION_ARGS)
     658             : {
     659             :     /*
     660             :      * pg_node_list stores the data in binary form and parsing text input is
     661             :      * not needed, so disallow this.
     662             :      */
     663           0 :     ereport(ERROR,
     664             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     665             :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     666             : 
     667             :     PG_RETURN_VOID();           /* keep compiler quiet */
     668             : }
     669             : 
     670             : /*
     671             :  * pg_dependencies      - output routine for type pg_dependencies.
     672             :  */
     673             : Datum
     674           0 : pg_dependencies_out(PG_FUNCTION_ARGS)
     675             : {
     676           0 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
     677           0 :     MVDependencies *dependencies = statext_dependencies_deserialize(data);
     678             :     int         i,
     679             :                 j;
     680             :     StringInfoData str;
     681             : 
     682           0 :     initStringInfo(&str);
     683           0 :     appendStringInfoChar(&str, '{');
     684             : 
     685           0 :     for (i = 0; i < dependencies->ndeps; i++)
     686             :     {
     687           0 :         MVDependency *dependency = dependencies->deps[i];
     688             : 
     689           0 :         if (i > 0)
     690           0 :             appendStringInfoString(&str, ", ");
     691             : 
     692           0 :         appendStringInfoChar(&str, '"');
     693           0 :         for (j = 0; j < dependency->nattributes; j++)
     694             :         {
     695           0 :             if (j == dependency->nattributes - 1)
     696           0 :                 appendStringInfoString(&str, " => ");
     697           0 :             else if (j > 0)
     698           0 :                 appendStringInfoString(&str, ", ");
     699             : 
     700           0 :             appendStringInfo(&str, "%d", dependency->attributes[j]);
     701             :         }
     702           0 :         appendStringInfo(&str, "\": %f", dependency->degree);
     703             :     }
     704             : 
     705           0 :     appendStringInfoChar(&str, '}');
     706             : 
     707           0 :     PG_RETURN_CSTRING(str.data);
     708             : }
     709             : 
     710             : /*
     711             :  * pg_dependencies_recv     - binary input routine for type pg_dependencies.
     712             :  */
     713             : Datum
     714           0 : pg_dependencies_recv(PG_FUNCTION_ARGS)
     715             : {
     716           0 :     ereport(ERROR,
     717             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     718             :              errmsg("cannot accept a value of type %s", "pg_dependencies")));
     719             : 
     720             :     PG_RETURN_VOID();           /* keep compiler quiet */
     721             : }
     722             : 
     723             : /*
     724             :  * pg_dependencies_send     - binary output routine for type pg_dependencies.
     725             :  *
     726             :  * Functional dependencies are serialized in a bytea value (although the type
     727             :  * is named differently), so let's just send that.
     728             :  */
     729             : Datum
     730           0 : pg_dependencies_send(PG_FUNCTION_ARGS)
     731             : {
     732           0 :     return byteasend(fcinfo);
     733             : }
     734             : 
     735             : /*
     736             :  * dependency_is_compatible_clause
     737             :  *      Determines if the clause is compatible with functional dependencies
     738             :  *
     739             :  * Only OpExprs with two arguments using an equality operator are supported.
     740             :  * When returning True attnum is set to the attribute number of the Var within
     741             :  * the supported clause.
     742             :  *
     743             :  * Currently we only support Var = Const, or Const = Var. It may be possible
     744             :  * to expand on this later.
     745             :  */
     746             : static bool
     747          49 : dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
     748             : {
     749          49 :     RestrictInfo *rinfo = (RestrictInfo *) clause;
     750             : 
     751          49 :     if (!IsA(rinfo, RestrictInfo))
     752           0 :         return false;
     753             : 
     754             :     /* Pseudoconstants are not really interesting here. */
     755          49 :     if (rinfo->pseudoconstant)
     756           0 :         return false;
     757             : 
     758             :     /* clauses referencing multiple varnos are incompatible */
     759          49 :     if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
     760           0 :         return false;
     761             : 
     762          49 :     if (is_opclause(rinfo->clause))
     763             :     {
     764          49 :         OpExpr     *expr = (OpExpr *) rinfo->clause;
     765             :         Var        *var;
     766          49 :         bool        varonleft = true;
     767             :         bool        ok;
     768             : 
     769             :         /* Only expressions with two arguments are considered compatible. */
     770          49 :         if (list_length(expr->args) != 2)
     771           0 :             return false;
     772             : 
     773             :         /* see if it actually has the right */
     774         147 :         ok = (NumRelids((Node *) expr) == 1) &&
     775          49 :             (is_pseudo_constant_clause(lsecond(expr->args)) ||
     776           0 :              (varonleft = false,
     777           0 :               is_pseudo_constant_clause(linitial(expr->args))));
     778             : 
     779             :         /* unsupported structure (two variables or so) */
     780          49 :         if (!ok)
     781           0 :             return false;
     782             : 
     783             :         /*
     784             :          * If it's not "=" operator, just ignore the clause, as it's not
     785             :          * compatible with functional dependencies.
     786             :          *
     787             :          * This uses the function for estimating selectivity, not the operator
     788             :          * directly (a bit awkward, but well ...).
     789             :          */
     790          49 :         if (get_oprrest(expr->opno) != F_EQSEL)
     791           0 :             return false;
     792             : 
     793          49 :         var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
     794             : 
     795             :         /* We only support plain Vars for now */
     796          49 :         if (!IsA(var, Var))
     797           0 :             return false;
     798             : 
     799             :         /* Ensure var is from the correct relation */
     800          49 :         if (var->varno != relid)
     801           0 :             return false;
     802             : 
     803             :         /* we also better ensure the Var is from the current level */
     804          49 :         if (var->varlevelsup > 0)
     805           0 :             return false;
     806             : 
     807             :         /* Also skip system attributes (we don't allow stats on those). */
     808          49 :         if (!AttrNumberIsForUserDefinedAttr(var->varattno))
     809           0 :             return false;
     810             : 
     811          49 :         *attnum = var->varattno;
     812          49 :         return true;
     813             :     }
     814             : 
     815           0 :     return false;
     816             : }
     817             : 
     818             : /*
     819             :  * find_strongest_dependency
     820             :  *      find the strongest dependency on the attributes
     821             :  *
     822             :  * When applying functional dependencies, we start with the strongest
     823             :  * dependencies. That is, we select the dependency that:
     824             :  *
     825             :  * (a) has all attributes covered by equality clauses
     826             :  *
     827             :  * (b) has the most attributes
     828             :  *
     829             :  * (c) has the highest degree of validity
     830             :  *
     831             :  * This guarantees that we eliminate the most redundant conditions first
     832             :  * (see the comment in dependencies_clauselist_selectivity).
     833             :  */
     834             : static MVDependency *
     835          49 : find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies,
     836             :                           Bitmapset *attnums)
     837             : {
     838             :     int         i;
     839          49 :     MVDependency *strongest = NULL;
     840             : 
     841             :     /* number of attnums in clauses */
     842          49 :     int         nattnums = bms_num_members(attnums);
     843             : 
     844             :     /*
     845             :      * Iterate over the MVDependency items and find the strongest one from the
     846             :      * fully-matched dependencies. We do the cheap checks first, before
     847             :      * matching it against the attnums.
     848             :      */
     849         294 :     for (i = 0; i < dependencies->ndeps; i++)
     850             :     {
     851         245 :         MVDependency *dependency = dependencies->deps[i];
     852             : 
     853             :         /*
     854             :          * Skip dependencies referencing more attributes than available
     855             :          * clauses, as those can't be fully matched.
     856             :          */
     857         245 :         if (dependency->nattributes > nattnums)
     858         140 :             continue;
     859             : 
     860         105 :         if (strongest)
     861             :         {
     862             :             /* skip dependencies on fewer attributes than the strongest. */
     863          67 :             if (dependency->nattributes < strongest->nattributes)
     864           0 :                 continue;
     865             : 
     866             :             /* also skip weaker dependencies when attribute count matches */
     867         125 :             if (strongest->nattributes == dependency->nattributes &&
     868          58 :                 strongest->degree > dependency->degree)
     869           0 :                 continue;
     870             :         }
     871             : 
     872             :         /*
     873             :          * this dependency is stronger, but we must still check that it's
     874             :          * fully matched to these attnums. We perform this check last as it's
     875             :          * slightly more expensive than the previous checks.
     876             :          */
     877         105 :         if (dependency_is_fully_matched(dependency, attnums))
     878          65 :             strongest = dependency; /* save new best match */
     879             :     }
     880             : 
     881          49 :     return strongest;
     882             : }
     883             : 
     884             : /*
     885             :  * dependencies_clauselist_selectivity
     886             :  *      Return the estimated selectivity of the given clauses using
     887             :  *      functional dependency statistics, or 1.0 if no useful functional
     888             :  *      dependency statistic exists.
     889             :  *
     890             :  * 'estimatedclauses' is an output argument that gets a bit set corresponding
     891             :  * to the (zero-based) list index of clauses that are included in the
     892             :  * estimated selectivity.
     893             :  *
     894             :  * Given equality clauses on attributes (a,b) we find the strongest dependency
     895             :  * between them, i.e. either (a=>b) or (b=>a). Assuming (a=>b) is the selected
     896             :  * dependency, we then combine the per-clause selectivities using the formula
     897             :  *
     898             :  *     P(a,b) = P(a) * [f + (1-f)*P(b)]
     899             :  *
     900             :  * where 'f' is the degree of the dependency.
     901             :  *
     902             :  * With clauses on more than two attributes, the dependencies are applied
     903             :  * recursively, starting with the widest/strongest dependencies. For example
     904             :  * P(a,b,c) is first split like this:
     905             :  *
     906             :  *     P(a,b,c) = P(a,b) * [f + (1-f)*P(c)]
     907             :  *
     908             :  * assuming (a,b=>c) is the strongest dependency.
     909             :  */
     910             : Selectivity
     911          20 : dependencies_clauselist_selectivity(PlannerInfo *root,
     912             :                                     List *clauses,
     913             :                                     int varRelid,
     914             :                                     JoinType jointype,
     915             :                                     SpecialJoinInfo *sjinfo,
     916             :                                     RelOptInfo *rel,
     917             :                                     Bitmapset **estimatedclauses)
     918             : {
     919          20 :     Selectivity s1 = 1.0;
     920             :     ListCell   *l;
     921          20 :     Bitmapset  *clauses_attnums = NULL;
     922             :     StatisticExtInfo *stat;
     923             :     MVDependencies *dependencies;
     924             :     AttrNumber *list_attnums;
     925             :     int         listidx;
     926             : 
     927             :     /* check if there's any stats that might be useful for us. */
     928          20 :     if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
     929           0 :         return 1.0;
     930             : 
     931          20 :     list_attnums = (AttrNumber *) palloc(sizeof(AttrNumber) *
     932          20 :                                          list_length(clauses));
     933             : 
     934             :     /*
     935             :      * Pre-process the clauses list to extract the attnums seen in each item.
     936             :      * We need to determine if there's any clauses which will be useful for
     937             :      * dependency selectivity estimations. Along the way we'll record all of
     938             :      * the attnums for each clause in a list which we'll reference later so we
     939             :      * don't need to repeat the same work again. We'll also keep track of all
     940             :      * attnums seen.
     941             :      */
     942          20 :     listidx = 0;
     943          69 :     foreach(l, clauses)
     944             :     {
     945          49 :         Node       *clause = (Node *) lfirst(l);
     946             :         AttrNumber  attnum;
     947             : 
     948          49 :         if (dependency_is_compatible_clause(clause, rel->relid, &attnum))
     949             :         {
     950          49 :             list_attnums[listidx] = attnum;
     951          49 :             clauses_attnums = bms_add_member(clauses_attnums, attnum);
     952             :         }
     953             :         else
     954           0 :             list_attnums[listidx] = InvalidAttrNumber;
     955             : 
     956          49 :         listidx++;
     957             :     }
     958             : 
     959             :     /*
     960             :      * If there's not at least two distinct attnums then reject the whole list
     961             :      * of clauses. We must return 1.0 so the calling function's selectivity is
     962             :      * unaffected.
     963             :      */
     964          20 :     if (bms_num_members(clauses_attnums) < 2)
     965             :     {
     966           0 :         pfree(list_attnums);
     967           0 :         return 1.0;
     968             :     }
     969             : 
     970             :     /* find the best suited statistics object for these attnums */
     971          20 :     stat = choose_best_statistics(rel->statlist, clauses_attnums,
     972             :                                   STATS_EXT_DEPENDENCIES);
     973             : 
     974             :     /* if no matching stats could be found then we've nothing to do */
     975          20 :     if (!stat)
     976             :     {
     977           0 :         pfree(list_attnums);
     978           0 :         return 1.0;
     979             :     }
     980             : 
     981             :     /* load the dependency items stored in the statistics object */
     982          20 :     dependencies = statext_dependencies_load(stat->statOid);
     983             : 
     984             :     /*
     985             :      * Apply the dependencies recursively, starting with the widest/strongest
     986             :      * ones, and proceeding to the smaller/weaker ones. At the end of each
     987             :      * round we factor in the selectivity of clauses on the implied attribute,
     988             :      * and remove the clauses from the list.
     989             :      */
     990             :     while (true)
     991             :     {
     992          49 :         Selectivity s2 = 1.0;
     993             :         MVDependency *dependency;
     994             : 
     995             :         /* the widest/strongest dependency, fully matched by clauses */
     996          49 :         dependency = find_strongest_dependency(stat, dependencies,
     997             :                                                clauses_attnums);
     998             : 
     999             :         /* if no suitable dependency was found, we're done */
    1000          49 :         if (!dependency)
    1001          20 :             break;
    1002             : 
    1003             :         /*
    1004             :          * We found an applicable dependency, so find all the clauses on the
    1005             :          * implied attribute - with dependency (a,b => c) we look for clauses
    1006             :          * on 'c'.
    1007             :          */
    1008          29 :         listidx = -1;
    1009         105 :         foreach(l, clauses)
    1010             :         {
    1011             :             Node       *clause;
    1012             : 
    1013          76 :             listidx++;
    1014             : 
    1015             :             /*
    1016             :              * Skip incompatible clauses, and ones we've already estimated on.
    1017             :              */
    1018         152 :             if (list_attnums[listidx] == InvalidAttrNumber ||
    1019          76 :                 bms_is_member(listidx, *estimatedclauses))
    1020           9 :                 continue;
    1021             : 
    1022             :             /*
    1023             :              * Technically we could find more than one clause for a given
    1024             :              * attnum. Since these clauses must be equality clauses, we choose
    1025             :              * to only take the selectivity estimate from the final clause in
    1026             :              * the list for this attnum. If the attnum happens to be compared
    1027             :              * to a different Const in another clause then no rows will match
    1028             :              * anyway. If it happens to be compared to the same Const, then
    1029             :              * ignoring the additional clause is just the thing to do.
    1030             :              */
    1031          67 :             if (dependency_implies_attribute(dependency,
    1032          67 :                                              list_attnums[listidx]))
    1033             :             {
    1034          29 :                 clause = (Node *) lfirst(l);
    1035             : 
    1036          29 :                 s2 = clause_selectivity(root, clause, varRelid, jointype,
    1037             :                                         sjinfo);
    1038             : 
    1039             :                 /* mark this one as done, so we don't touch it again. */
    1040          29 :                 *estimatedclauses = bms_add_member(*estimatedclauses, listidx);
    1041             : 
    1042             :                 /*
    1043             :                  * Mark that we've got and used the dependency on this clause.
    1044             :                  * We'll want to ignore this when looking for the next
    1045             :                  * strongest dependency above.
    1046             :                  */
    1047          29 :                 clauses_attnums = bms_del_member(clauses_attnums,
    1048          29 :                                                  list_attnums[listidx]);
    1049             :             }
    1050             :         }
    1051             : 
    1052             :         /*
    1053             :          * Now factor in the selectivity for all the "implied" clauses into
    1054             :          * the final one, using this formula:
    1055             :          *
    1056             :          * P(a,b) = P(a) * (f + (1-f) * P(b))
    1057             :          *
    1058             :          * where 'f' is the degree of validity of the dependency.
    1059             :          */
    1060          29 :         s1 *= (dependency->degree + (1 - dependency->degree) * s2);
    1061          29 :     }
    1062             : 
    1063          20 :     pfree(dependencies);
    1064          20 :     pfree(list_attnums);
    1065             : 
    1066          20 :     return s1;
    1067             : }

Generated by: LCOV version 1.11