LCOV - code coverage report
Current view: top level - src/backend/statistics - extended_stats.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 155 160 96.9 %
Date: 2017-09-29 13:40:31 Functions: 12 12 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * extended_stats.c
       4             :  *    POSTGRES extended statistics
       5             :  *
       6             :  * Generic code supporting statistics objects created via CREATE STATISTICS.
       7             :  *
       8             :  *
       9             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/statistics/extended_stats.c
      14             :  *
      15             :  *-------------------------------------------------------------------------
      16             :  */
      17             : #include "postgres.h"
      18             : 
      19             : #include "access/genam.h"
      20             : #include "access/heapam.h"
      21             : #include "access/htup_details.h"
      22             : #include "catalog/indexing.h"
      23             : #include "catalog/pg_collation.h"
      24             : #include "catalog/pg_statistic_ext.h"
      25             : #include "nodes/relation.h"
      26             : #include "postmaster/autovacuum.h"
      27             : #include "statistics/extended_stats_internal.h"
      28             : #include "statistics/statistics.h"
      29             : #include "utils/builtins.h"
      30             : #include "utils/fmgroids.h"
      31             : #include "utils/lsyscache.h"
      32             : #include "utils/memutils.h"
      33             : #include "utils/rel.h"
      34             : #include "utils/syscache.h"
      35             : 
      36             : 
      37             : /*
      38             :  * Used internally to refer to an individual statistics object, i.e.,
      39             :  * a pg_statistic_ext entry.
      40             :  */
      41             : typedef struct StatExtEntry
      42             : {
      43             :     Oid         statOid;        /* OID of pg_statistic_ext entry */
      44             :     char       *schema;         /* statistics object's schema */
      45             :     char       *name;           /* statistics object's name */
      46             :     Bitmapset  *columns;        /* attribute numbers covered by the object */
      47             :     List       *types;          /* 'char' list of enabled statistic kinds */
      48             : } StatExtEntry;
      49             : 
      50             : 
      51             : static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
      52             : static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
      53             :                       int nvacatts, VacAttrStats **vacatts);
      54             : static void statext_store(Relation pg_stext, Oid relid,
      55             :               MVNDistinct *ndistinct, MVDependencies *dependencies,
      56             :               VacAttrStats **stats);
      57             : 
      58             : 
      59             : /*
      60             :  * Compute requested extended stats, using the rows sampled for the plain
      61             :  * (single-column) stats.
      62             :  *
      63             :  * This fetches a list of stats types from pg_statistic_ext, computes the
      64             :  * requested stats, and serializes them back into the catalog.
      65             :  */
      66             : void
      67         178 : BuildRelationExtStatistics(Relation onerel, double totalrows,
      68             :                            int numrows, HeapTuple *rows,
      69             :                            int natts, VacAttrStats **vacattrstats)
      70             : {
      71             :     Relation    pg_stext;
      72             :     ListCell   *lc;
      73             :     List       *stats;
      74             :     MemoryContext cxt;
      75             :     MemoryContext oldcxt;
      76             : 
      77         178 :     cxt = AllocSetContextCreate(CurrentMemoryContext, "stats ext",
      78             :                                 ALLOCSET_DEFAULT_SIZES);
      79         178 :     oldcxt = MemoryContextSwitchTo(cxt);
      80             : 
      81         178 :     pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
      82         178 :     stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
      83             : 
      84         186 :     foreach(lc, stats)
      85             :     {
      86           8 :         StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
      87           8 :         MVNDistinct *ndistinct = NULL;
      88           8 :         MVDependencies *dependencies = NULL;
      89             :         VacAttrStats **stats;
      90             :         ListCell   *lc2;
      91             : 
      92             :         /*
      93             :          * Check if we can build these stats based on the column analyzed. If
      94             :          * not, report this fact (except in autovacuum) and move on.
      95             :          */
      96           8 :         stats = lookup_var_attr_stats(onerel, stat->columns,
      97             :                                       natts, vacattrstats);
      98           8 :         if (!stats && !IsAutoVacuumWorkerProcess())
      99             :         {
     100           2 :             ereport(WARNING,
     101             :                     (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
     102             :                      errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
     103             :                             stat->schema, stat->name,
     104             :                             get_namespace_name(onerel->rd_rel->relnamespace),
     105             :                             RelationGetRelationName(onerel)),
     106             :                      errtable(onerel)));
     107           2 :             continue;
     108             :         }
     109             : 
     110             :         /* check allowed number of dimensions */
     111           6 :         Assert(bms_num_members(stat->columns) >= 2 &&
     112             :                bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
     113             : 
     114             :         /* compute statistic of each requested type */
     115          15 :         foreach(lc2, stat->types)
     116             :         {
     117           9 :             char        t = (char) lfirst_int(lc2);
     118             : 
     119           9 :             if (t == STATS_EXT_NDISTINCT)
     120           3 :                 ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
     121             :                                                     stat->columns, stats);
     122           6 :             else if (t == STATS_EXT_DEPENDENCIES)
     123           6 :                 dependencies = statext_dependencies_build(numrows, rows,
     124             :                                                           stat->columns, stats);
     125             :         }
     126             : 
     127             :         /* store the statistics in the catalog */
     128           6 :         statext_store(pg_stext, stat->statOid, ndistinct, dependencies, stats);
     129             :     }
     130             : 
     131         178 :     heap_close(pg_stext, RowExclusiveLock);
     132             : 
     133         178 :     MemoryContextSwitchTo(oldcxt);
     134         178 :     MemoryContextDelete(cxt);
     135         178 : }
     136             : 
     137             : /*
     138             :  * statext_is_kind_built
     139             :  *      Is this stat kind built in the given pg_statistic_ext tuple?
     140             :  */
     141             : bool
     142          32 : statext_is_kind_built(HeapTuple htup, char type)
     143             : {
     144             :     AttrNumber  attnum;
     145             : 
     146          32 :     switch (type)
     147             :     {
     148             :         case STATS_EXT_NDISTINCT:
     149          16 :             attnum = Anum_pg_statistic_ext_stxndistinct;
     150          16 :             break;
     151             : 
     152             :         case STATS_EXT_DEPENDENCIES:
     153          16 :             attnum = Anum_pg_statistic_ext_stxdependencies;
     154          16 :             break;
     155             : 
     156             :         default:
     157           0 :             elog(ERROR, "unexpected statistics type requested: %d", type);
     158             :     }
     159             : 
     160          32 :     return !heap_attisnull(htup, attnum);
     161             : }
     162             : 
     163             : /*
     164             :  * Return a list (of StatExtEntry) of statistics objects for the given relation.
     165             :  */
     166             : static List *
     167         178 : fetch_statentries_for_relation(Relation pg_statext, Oid relid)
     168             : {
     169             :     SysScanDesc scan;
     170             :     ScanKeyData skey;
     171             :     HeapTuple   htup;
     172         178 :     List       *result = NIL;
     173             : 
     174             :     /*
     175             :      * Prepare to scan pg_statistic_ext for entries having stxrelid = this
     176             :      * rel.
     177             :      */
     178         178 :     ScanKeyInit(&skey,
     179             :                 Anum_pg_statistic_ext_stxrelid,
     180             :                 BTEqualStrategyNumber, F_OIDEQ,
     181             :                 ObjectIdGetDatum(relid));
     182             : 
     183         178 :     scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
     184             :                               NULL, 1, &skey);
     185             : 
     186         364 :     while (HeapTupleIsValid(htup = systable_getnext(scan)))
     187             :     {
     188             :         StatExtEntry *entry;
     189             :         Datum       datum;
     190             :         bool        isnull;
     191             :         int         i;
     192             :         ArrayType  *arr;
     193             :         char       *enabled;
     194             :         Form_pg_statistic_ext staForm;
     195             : 
     196           8 :         entry = palloc0(sizeof(StatExtEntry));
     197           8 :         entry->statOid = HeapTupleGetOid(htup);
     198           8 :         staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
     199           8 :         entry->schema = get_namespace_name(staForm->stxnamespace);
     200           8 :         entry->name = pstrdup(NameStr(staForm->stxname));
     201          29 :         for (i = 0; i < staForm->stxkeys.dim1; i++)
     202             :         {
     203          21 :             entry->columns = bms_add_member(entry->columns,
     204          21 :                                             staForm->stxkeys.values[i]);
     205             :         }
     206             : 
     207             :         /* decode the stxkind char array into a list of chars */
     208           8 :         datum = SysCacheGetAttr(STATEXTOID, htup,
     209             :                                 Anum_pg_statistic_ext_stxkind, &isnull);
     210           8 :         Assert(!isnull);
     211           8 :         arr = DatumGetArrayTypeP(datum);
     212          16 :         if (ARR_NDIM(arr) != 1 ||
     213          16 :             ARR_HASNULL(arr) ||
     214           8 :             ARR_ELEMTYPE(arr) != CHAROID)
     215           0 :             elog(ERROR, "stxkind is not a 1-D char array");
     216           8 :         enabled = (char *) ARR_DATA_PTR(arr);
     217          21 :         for (i = 0; i < ARR_DIMS(arr)[0]; i++)
     218             :         {
     219          13 :             Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
     220             :                    (enabled[i] == STATS_EXT_DEPENDENCIES));
     221          13 :             entry->types = lappend_int(entry->types, (int) enabled[i]);
     222             :         }
     223             : 
     224           8 :         result = lappend(result, entry);
     225             :     }
     226             : 
     227         178 :     systable_endscan(scan);
     228             : 
     229         178 :     return result;
     230             : }
     231             : 
     232             : /*
     233             :  * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
     234             :  * VacAttrStats array which includes only the items corresponding to
     235             :  * attributes indicated by 'stxkeys'. If we don't have all of the per column
     236             :  * stats available to compute the extended stats, then we return NULL to indicate
     237             :  * to the caller that the stats should not be built.
     238             :  */
     239             : static VacAttrStats **
     240           8 : lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
     241             :                       int nvacatts, VacAttrStats **vacatts)
     242             : {
     243           8 :     int         i = 0;
     244           8 :     int         x = -1;
     245             :     VacAttrStats **stats;
     246             : 
     247           8 :     stats = (VacAttrStats **)
     248           8 :         palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
     249             : 
     250             :     /* lookup VacAttrStats info for the requested columns (same attnum) */
     251          34 :     while ((x = bms_next_member(attrs, x)) >= 0)
     252             :     {
     253             :         int         j;
     254             : 
     255          20 :         stats[i] = NULL;
     256          73 :         for (j = 0; j < nvacatts; j++)
     257             :         {
     258          71 :             if (x == vacatts[j]->tupattnum)
     259             :             {
     260          18 :                 stats[i] = vacatts[j];
     261          18 :                 break;
     262             :             }
     263             :         }
     264             : 
     265          20 :         if (!stats[i])
     266             :         {
     267             :             /*
     268             :              * Looks like stats were not gathered for one of the columns
     269             :              * required. We'll be unable to build the extended stats without
     270             :              * this column.
     271             :              */
     272           2 :             pfree(stats);
     273           2 :             return NULL;
     274             :         }
     275             : 
     276             :         /*
     277             :          * Sanity check that the column is not dropped - stats should have
     278             :          * been removed in this case.
     279             :          */
     280          18 :         Assert(!stats[i]->attr->attisdropped);
     281             : 
     282          18 :         i++;
     283             :     }
     284             : 
     285           6 :     return stats;
     286             : }
     287             : 
     288             : /*
     289             :  * statext_store
     290             :  *  Serializes the statistics and stores them into the pg_statistic_ext tuple.
     291             :  */
     292             : static void
     293           6 : statext_store(Relation pg_stext, Oid statOid,
     294             :               MVNDistinct *ndistinct, MVDependencies *dependencies,
     295             :               VacAttrStats **stats)
     296             : {
     297             :     HeapTuple   stup,
     298             :                 oldtup;
     299             :     Datum       values[Natts_pg_statistic_ext];
     300             :     bool        nulls[Natts_pg_statistic_ext];
     301             :     bool        replaces[Natts_pg_statistic_ext];
     302             : 
     303           6 :     memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool));
     304           6 :     memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool));
     305           6 :     memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum));
     306             : 
     307             :     /*
     308             :      * Construct a new pg_statistic_ext tuple, replacing the calculated stats.
     309             :      */
     310           6 :     if (ndistinct != NULL)
     311             :     {
     312           3 :         bytea      *data = statext_ndistinct_serialize(ndistinct);
     313             : 
     314           3 :         nulls[Anum_pg_statistic_ext_stxndistinct - 1] = (data == NULL);
     315           3 :         values[Anum_pg_statistic_ext_stxndistinct - 1] = PointerGetDatum(data);
     316             :     }
     317             : 
     318           6 :     if (dependencies != NULL)
     319             :     {
     320           4 :         bytea      *data = statext_dependencies_serialize(dependencies);
     321             : 
     322           4 :         nulls[Anum_pg_statistic_ext_stxdependencies - 1] = (data == NULL);
     323           4 :         values[Anum_pg_statistic_ext_stxdependencies - 1] = PointerGetDatum(data);
     324             :     }
     325             : 
     326             :     /* always replace the value (either by bytea or NULL) */
     327           6 :     replaces[Anum_pg_statistic_ext_stxndistinct - 1] = true;
     328           6 :     replaces[Anum_pg_statistic_ext_stxdependencies - 1] = true;
     329             : 
     330             :     /* there should already be a pg_statistic_ext tuple */
     331           6 :     oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
     332           6 :     if (!HeapTupleIsValid(oldtup))
     333           0 :         elog(ERROR, "cache lookup failed for statistics object %u", statOid);
     334             : 
     335             :     /* replace it */
     336           6 :     stup = heap_modify_tuple(oldtup,
     337             :                              RelationGetDescr(pg_stext),
     338             :                              values,
     339             :                              nulls,
     340             :                              replaces);
     341           6 :     ReleaseSysCache(oldtup);
     342           6 :     CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
     343             : 
     344           6 :     heap_freetuple(stup);
     345           6 : }
     346             : 
     347             : /* initialize multi-dimensional sort */
     348             : MultiSortSupport
     349          56 : multi_sort_init(int ndims)
     350             : {
     351             :     MultiSortSupport mss;
     352             : 
     353          56 :     Assert(ndims >= 2);
     354             : 
     355          56 :     mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
     356          56 :                                      + sizeof(SortSupportData) * ndims);
     357             : 
     358          56 :     mss->ndims = ndims;
     359             : 
     360          56 :     return mss;
     361             : }
     362             : 
     363             : /*
     364             :  * Prepare sort support info using the given sort operator
     365             :  * at the position 'sortdim'
     366             :  */
     367             : void
     368         129 : multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
     369             : {
     370         129 :     SortSupport ssup = &mss->ssup[sortdim];
     371             : 
     372         129 :     ssup->ssup_cxt = CurrentMemoryContext;
     373         129 :     ssup->ssup_collation = DEFAULT_COLLATION_OID;
     374         129 :     ssup->ssup_nulls_first = false;
     375         129 :     ssup->ssup_cxt = CurrentMemoryContext;
     376             : 
     377         129 :     PrepareSortSupportFromOrderingOp(oper, ssup);
     378         129 : }
     379             : 
     380             : /* compare all the dimensions in the selected order */
     381             : int
     382     3288493 : multi_sort_compare(const void *a, const void *b, void *arg)
     383             : {
     384     3288493 :     MultiSortSupport mss = (MultiSortSupport) arg;
     385     3288493 :     SortItem   *ia = (SortItem *) a;
     386     3288493 :     SortItem   *ib = (SortItem *) b;
     387             :     int         i;
     388             : 
     389     6098276 :     for (i = 0; i < mss->ndims; i++)
     390             :     {
     391             :         int         compare;
     392             : 
     393    10716246 :         compare = ApplySortComparator(ia->values[i], ia->isnull[i],
     394    10716246 :                                       ib->values[i], ib->isnull[i],
     395     5358123 :                                       &mss->ssup[i]);
     396             : 
     397     5358123 :         if (compare != 0)
     398     2548340 :             return compare;
     399             :     }
     400             : 
     401             :     /* equal by default */
     402      740153 :     return 0;
     403             : }
     404             : 
     405             : /* compare selected dimension */
     406             : int
     407      484375 : multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
     408             :                        MultiSortSupport mss)
     409             : {
     410      968750 :     return ApplySortComparator(a->values[dim], a->isnull[dim],
     411      968750 :                                b->values[dim], b->isnull[dim],
     412      484375 :                                &mss->ssup[dim]);
     413             : }
     414             : 
     415             : int
     416      496953 : multi_sort_compare_dims(int start, int end,
     417             :                         const SortItem *a, const SortItem *b,
     418             :                         MultiSortSupport mss)
     419             : {
     420             :     int         dim;
     421             : 
     422     1144699 :     for (dim = start; dim <= end; dim++)
     423             :     {
     424     1320648 :         int         r = ApplySortComparator(a->values[dim], a->isnull[dim],
     425     1320648 :                                             b->values[dim], b->isnull[dim],
     426      660324 :                                             &mss->ssup[dim]);
     427             : 
     428      660324 :         if (r != 0)
     429       12578 :             return r;
     430             :     }
     431             : 
     432      484375 :     return 0;
     433             : }
     434             : 
     435             : /*
     436             :  * has_stats_of_kind
     437             :  *      Check whether the list contains statistic of a given kind
     438             :  */
     439             : bool
     440          20 : has_stats_of_kind(List *stats, char requiredkind)
     441             : {
     442             :     ListCell   *l;
     443             : 
     444          20 :     foreach(l, stats)
     445             :     {
     446          20 :         StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
     447             : 
     448          20 :         if (stat->kind == requiredkind)
     449          20 :             return true;
     450             :     }
     451             : 
     452           0 :     return false;
     453             : }
     454             : 
     455             : /*
     456             :  * choose_best_statistics
     457             :  *      Look for and return statistics with the specified 'requiredkind' which
     458             :  *      have keys that match at least two of the given attnums.  Return NULL if
     459             :  *      there's no match.
     460             :  *
     461             :  * The current selection criteria is very simple - we choose the statistics
     462             :  * object referencing the most of the requested attributes, breaking ties
     463             :  * in favor of objects with fewer keys overall.
     464             :  *
     465             :  * XXX if multiple statistics objects tie on both criteria, then which object
     466             :  * is chosen depends on the order that they appear in the stats list. Perhaps
     467             :  * further tiebreakers are needed.
     468             :  */
     469             : StatisticExtInfo *
     470          20 : choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind)
     471             : {
     472             :     ListCell   *lc;
     473          20 :     StatisticExtInfo *best_match = NULL;
     474          20 :     int         best_num_matched = 2;   /* goal #1: maximize */
     475          20 :     int         best_match_keys = (STATS_MAX_DIMENSIONS + 1);   /* goal #2: minimize */
     476             : 
     477          40 :     foreach(lc, stats)
     478             :     {
     479          20 :         StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
     480             :         int         num_matched;
     481             :         int         numkeys;
     482             :         Bitmapset  *matched;
     483             : 
     484             :         /* skip statistics that are not of the correct type */
     485          20 :         if (info->kind != requiredkind)
     486           0 :             continue;
     487             : 
     488             :         /* determine how many attributes of these stats can be matched to */
     489          20 :         matched = bms_intersect(attnums, info->keys);
     490          20 :         num_matched = bms_num_members(matched);
     491          20 :         bms_free(matched);
     492             : 
     493             :         /*
     494             :          * save the actual number of keys in the stats so that we can choose
     495             :          * the narrowest stats with the most matching keys.
     496             :          */
     497          20 :         numkeys = bms_num_members(info->keys);
     498             : 
     499             :         /*
     500             :          * Use this object when it increases the number of matched clauses or
     501             :          * when it matches the same number of attributes but these stats have
     502             :          * fewer keys than any previous match.
     503             :          */
     504          20 :         if (num_matched > best_num_matched ||
     505          11 :             (num_matched == best_num_matched && numkeys < best_match_keys))
     506             :         {
     507          20 :             best_match = info;
     508          20 :             best_num_matched = num_matched;
     509          20 :             best_match_keys = numkeys;
     510             :         }
     511             :     }
     512             : 
     513          20 :     return best_match;
     514             : }

Generated by: LCOV version 1.11