LCOV - code coverage report
Current view: top level - src/backend/statistics - mvdistinct.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 202 219 92.2 %
Date: 2017-09-29 13:40:31 Functions: 14 17 82.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * mvdistinct.c
       4             :  *    POSTGRES multivariate ndistinct coefficients
       5             :  *
       6             :  * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
       7             :  * is tricky, and the estimation error is often significant.
       8             : 
       9             :  * The multivariate ndistinct coefficients address this by storing ndistinct
      10             :  * estimates for combinations of the user-specified columns.  So for example
      11             :  * given a statistics object on three columns (a,b,c), this module estimates
      12             :  * and stores n-distinct for (a,b), (a,c), (b,c) and (a,b,c).  The per-column
      13             :  * estimates are already available in pg_statistic.
      14             :  *
      15             :  *
      16             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      17             :  * Portions Copyright (c) 1994, Regents of the University of California
      18             :  *
      19             :  * IDENTIFICATION
      20             :  *    src/backend/statistics/mvdistinct.c
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : #include "postgres.h"
      25             : 
      26             : #include <math.h>
      27             : 
      28             : #include "access/htup_details.h"
      29             : #include "catalog/pg_statistic_ext.h"
      30             : #include "utils/fmgrprotos.h"
      31             : #include "utils/lsyscache.h"
      32             : #include "lib/stringinfo.h"
      33             : #include "utils/syscache.h"
      34             : #include "utils/typcache.h"
      35             : #include "statistics/extended_stats_internal.h"
      36             : #include "statistics/statistics.h"
      37             : 
      38             : 
      39             : static double ndistinct_for_combination(double totalrows, int numrows,
      40             :                           HeapTuple *rows, VacAttrStats **stats,
      41             :                           int k, int *combination);
      42             : static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
      43             : static int  n_choose_k(int n, int k);
      44             : static int  num_combinations(int n);
      45             : 
      46             : /* Combination generator API */
      47             : 
      48             : /* internal state for generator of k-combinations of n elements */
      49             : typedef struct CombinationGenerator
      50             : {
      51             :     int         k;              /* size of the combination */
      52             :     int         n;              /* total number of elements */
      53             :     int         current;        /* index of the next combination to return */
      54             :     int         ncombinations;  /* number of combinations (size of array) */
      55             :     int        *combinations;   /* array of pre-built combinations */
      56             : } CombinationGenerator;
      57             : 
      58             : static CombinationGenerator *generator_init(int n, int k);
      59             : static void generator_free(CombinationGenerator *state);
      60             : static int *generator_next(CombinationGenerator *state);
      61             : static void generate_combinations(CombinationGenerator *state);
      62             : 
      63             : 
      64             : /*
      65             :  * statext_ndistinct_build
      66             :  *      Compute ndistinct coefficient for the combination of attributes.
      67             :  *
      68             :  * This computes the ndistinct estimate using the same estimator used
      69             :  * in analyze.c and then computes the coefficient.
      70             :  */
      71             : MVNDistinct *
      72           3 : statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
      73             :                         Bitmapset *attrs, VacAttrStats **stats)
      74             : {
      75             :     MVNDistinct *result;
      76             :     int         k;
      77             :     int         itemcnt;
      78           3 :     int         numattrs = bms_num_members(attrs);
      79           3 :     int         numcombs = num_combinations(numattrs);
      80             : 
      81           3 :     result = palloc(offsetof(MVNDistinct, items) +
      82             :                     numcombs * sizeof(MVNDistinctItem));
      83           3 :     result->magic = STATS_NDISTINCT_MAGIC;
      84           3 :     result->type = STATS_NDISTINCT_TYPE_BASIC;
      85           3 :     result->nitems = numcombs;
      86             : 
      87           3 :     itemcnt = 0;
      88           8 :     for (k = 2; k <= numattrs; k++)
      89             :     {
      90             :         int        *combination;
      91             :         CombinationGenerator *generator;
      92             : 
      93             :         /* generate combinations of K out of N elements */
      94           5 :         generator = generator_init(numattrs, k);
      95             : 
      96           5 :         while ((combination = generator_next(generator)))
      97             :         {
      98           9 :             MVNDistinctItem *item = &result->items[itemcnt];
      99             :             int         j;
     100             : 
     101           9 :             item->attrs = NULL;
     102          29 :             for (j = 0; j < k; j++)
     103          20 :                 item->attrs = bms_add_member(item->attrs,
     104          20 :                                              stats[combination[j]]->attr->attnum);
     105           9 :             item->ndistinct =
     106           9 :                 ndistinct_for_combination(totalrows, numrows, rows,
     107             :                                           stats, k, combination);
     108             : 
     109           9 :             itemcnt++;
     110           9 :             Assert(itemcnt <= result->nitems);
     111             :         }
     112             : 
     113           5 :         generator_free(generator);
     114             :     }
     115             : 
     116             :     /* must consume exactly the whole output array */
     117           3 :     Assert(itemcnt == result->nitems);
     118             : 
     119           3 :     return result;
     120             : }
     121             : 
     122             : /*
     123             :  * statext_ndistinct_load
     124             :  *      Load the ndistinct value for the indicated pg_statistic_ext tuple
     125             :  */
     126             : MVNDistinct *
     127           9 : statext_ndistinct_load(Oid mvoid)
     128             : {
     129           9 :     bool        isnull = false;
     130             :     Datum       ndist;
     131             :     HeapTuple   htup;
     132             : 
     133           9 :     htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid));
     134           9 :     if (!htup)
     135           0 :         elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
     136             : 
     137           9 :     ndist = SysCacheGetAttr(STATEXTOID, htup,
     138             :                             Anum_pg_statistic_ext_stxndistinct, &isnull);
     139           9 :     if (isnull)
     140           0 :         elog(ERROR,
     141             :              "requested statistic kind %c is not yet built for statistics object %u",
     142             :              STATS_EXT_NDISTINCT, mvoid);
     143             : 
     144           9 :     ReleaseSysCache(htup);
     145             : 
     146           9 :     return statext_ndistinct_deserialize(DatumGetByteaP(ndist));
     147             : }
     148             : 
     149             : /*
     150             :  * statext_ndistinct_serialize
     151             :  *      serialize ndistinct to the on-disk bytea format
     152             :  */
     153             : bytea *
     154           3 : statext_ndistinct_serialize(MVNDistinct *ndistinct)
     155             : {
     156             :     int         i;
     157             :     bytea      *output;
     158             :     char       *tmp;
     159             :     Size        len;
     160             : 
     161           3 :     Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
     162           3 :     Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
     163             : 
     164             :     /*
     165             :      * Base size is size of scalar fields in the struct, plus one base struct
     166             :      * for each item, including number of items for each.
     167             :      */
     168           3 :     len = VARHDRSZ + SizeOfMVNDistinct +
     169           3 :         ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int));
     170             : 
     171             :     /* and also include space for the actual attribute numbers */
     172          12 :     for (i = 0; i < ndistinct->nitems; i++)
     173             :     {
     174             :         int         nmembers;
     175             : 
     176           9 :         nmembers = bms_num_members(ndistinct->items[i].attrs);
     177           9 :         Assert(nmembers >= 2);
     178           9 :         len += sizeof(AttrNumber) * nmembers;
     179             :     }
     180             : 
     181           3 :     output = (bytea *) palloc(len);
     182           3 :     SET_VARSIZE(output, len);
     183             : 
     184           3 :     tmp = VARDATA(output);
     185             : 
     186             :     /* Store the base struct values (magic, type, nitems) */
     187           3 :     memcpy(tmp, &ndistinct->magic, sizeof(uint32));
     188           3 :     tmp += sizeof(uint32);
     189           3 :     memcpy(tmp, &ndistinct->type, sizeof(uint32));
     190           3 :     tmp += sizeof(uint32);
     191           3 :     memcpy(tmp, &ndistinct->nitems, sizeof(uint32));
     192           3 :     tmp += sizeof(uint32);
     193             : 
     194             :     /*
     195             :      * store number of attributes and attribute numbers for each ndistinct
     196             :      * entry
     197             :      */
     198          12 :     for (i = 0; i < ndistinct->nitems; i++)
     199             :     {
     200           9 :         MVNDistinctItem item = ndistinct->items[i];
     201           9 :         int         nmembers = bms_num_members(item.attrs);
     202             :         int         x;
     203             : 
     204           9 :         memcpy(tmp, &item.ndistinct, sizeof(double));
     205           9 :         tmp += sizeof(double);
     206           9 :         memcpy(tmp, &nmembers, sizeof(int));
     207           9 :         tmp += sizeof(int);
     208             : 
     209           9 :         x = -1;
     210          38 :         while ((x = bms_next_member(item.attrs, x)) >= 0)
     211             :         {
     212          20 :             AttrNumber  value = (AttrNumber) x;
     213             : 
     214          20 :             memcpy(tmp, &value, sizeof(AttrNumber));
     215          20 :             tmp += sizeof(AttrNumber);
     216             :         }
     217             : 
     218           9 :         Assert(tmp <= ((char *) output + len));
     219             :     }
     220             : 
     221           3 :     return output;
     222             : }
     223             : 
     224             : /*
     225             :  * statext_ndistinct_deserialize
     226             :  *      Read an on-disk bytea format MVNDistinct to in-memory format
     227             :  */
     228             : MVNDistinct *
     229          11 : statext_ndistinct_deserialize(bytea *data)
     230             : {
     231             :     int         i;
     232             :     Size        minimum_size;
     233             :     MVNDistinct ndist;
     234             :     MVNDistinct *ndistinct;
     235             :     char       *tmp;
     236             : 
     237          11 :     if (data == NULL)
     238           0 :         return NULL;
     239             : 
     240             :     /* we expect at least the basic fields of MVNDistinct struct */
     241          11 :     if (VARSIZE_ANY_EXHDR(data) < SizeOfMVNDistinct)
     242           0 :         elog(ERROR, "invalid MVNDistinct size %zd (expected at least %zd)",
     243             :              VARSIZE_ANY_EXHDR(data), SizeOfMVNDistinct);
     244             : 
     245             :     /* initialize pointer to the data part (skip the varlena header) */
     246          11 :     tmp = VARDATA_ANY(data);
     247             : 
     248             :     /* read the header fields and perform basic sanity checks */
     249          11 :     memcpy(&ndist.magic, tmp, sizeof(uint32));
     250          11 :     tmp += sizeof(uint32);
     251          11 :     memcpy(&ndist.type, tmp, sizeof(uint32));
     252          11 :     tmp += sizeof(uint32);
     253          11 :     memcpy(&ndist.nitems, tmp, sizeof(uint32));
     254          11 :     tmp += sizeof(uint32);
     255             : 
     256          11 :     if (ndist.magic != STATS_NDISTINCT_MAGIC)
     257           0 :         ereport(ERROR,
     258             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     259             :                  errmsg("invalid ndistinct magic %08x (expected %08x)",
     260             :                         ndist.magic, STATS_NDISTINCT_MAGIC)));
     261          11 :     if (ndist.type != STATS_NDISTINCT_TYPE_BASIC)
     262           0 :         ereport(ERROR,
     263             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     264             :                  errmsg("invalid ndistinct type %d (expected %d)",
     265             :                         ndist.type, STATS_NDISTINCT_TYPE_BASIC)));
     266          11 :     if (ndist.nitems == 0)
     267           0 :         ereport(ERROR,
     268             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     269             :                  errmsg("invalid zero-length item array in MVNDistinct")));
     270             : 
     271             :     /* what minimum bytea size do we expect for those parameters */
     272          11 :     minimum_size = (SizeOfMVNDistinct +
     273          11 :                     ndist.nitems * (SizeOfMVNDistinctItem +
     274             :                                     sizeof(AttrNumber) * 2));
     275          11 :     if (VARSIZE_ANY_EXHDR(data) < minimum_size)
     276           0 :         ereport(ERROR,
     277             :                 (errcode(ERRCODE_DATA_CORRUPTED),
     278             :                  errmsg("invalid MVNDistinct size %zd (expected at least %zd)",
     279             :                         VARSIZE_ANY_EXHDR(data), minimum_size)));
     280             : 
     281             :     /*
     282             :      * Allocate space for the ndistinct items (no space for each item's
     283             :      * attnos: those live in bitmapsets allocated separately)
     284             :      */
     285          11 :     ndistinct = palloc0(MAXALIGN(SizeOfMVNDistinct) +
     286          11 :                         (ndist.nitems * sizeof(MVNDistinctItem)));
     287          11 :     ndistinct->magic = ndist.magic;
     288          11 :     ndistinct->type = ndist.type;
     289          11 :     ndistinct->nitems = ndist.nitems;
     290             : 
     291          55 :     for (i = 0; i < ndistinct->nitems; i++)
     292             :     {
     293          44 :         MVNDistinctItem *item = &ndistinct->items[i];
     294             :         int         nelems;
     295             : 
     296          44 :         item->attrs = NULL;
     297             : 
     298             :         /* ndistinct value */
     299          44 :         memcpy(&item->ndistinct, tmp, sizeof(double));
     300          44 :         tmp += sizeof(double);
     301             : 
     302             :         /* number of attributes */
     303          44 :         memcpy(&nelems, tmp, sizeof(int));
     304          44 :         tmp += sizeof(int);
     305          44 :         Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
     306             : 
     307         187 :         while (nelems-- > 0)
     308             :         {
     309             :             AttrNumber  attno;
     310             : 
     311          99 :             memcpy(&attno, tmp, sizeof(AttrNumber));
     312          99 :             tmp += sizeof(AttrNumber);
     313          99 :             item->attrs = bms_add_member(item->attrs, attno);
     314             :         }
     315             : 
     316             :         /* still within the bytea */
     317          44 :         Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
     318             :     }
     319             : 
     320             :     /* we should have consumed the whole bytea exactly */
     321          11 :     Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
     322             : 
     323          11 :     return ndistinct;
     324             : }
     325             : 
     326             : /*
     327             :  * pg_ndistinct_in
     328             :  *      input routine for type pg_ndistinct
     329             :  *
     330             :  * pg_ndistinct is real enough to be a table column, but it has no
     331             :  * operations of its own, and disallows input (jus like pg_node_tree).
     332             :  */
     333             : Datum
     334           0 : pg_ndistinct_in(PG_FUNCTION_ARGS)
     335             : {
     336           0 :     ereport(ERROR,
     337             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     338             :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
     339             : 
     340             :     PG_RETURN_VOID();           /* keep compiler quiet */
     341             : }
     342             : 
     343             : /*
     344             :  * pg_ndistinct
     345             :  *      output routine for type pg_ndistinct
     346             :  *
     347             :  * Produces a human-readable representation of the value.
     348             :  */
     349             : Datum
     350           2 : pg_ndistinct_out(PG_FUNCTION_ARGS)
     351             : {
     352           2 :     bytea      *data = PG_GETARG_BYTEA_PP(0);
     353           2 :     MVNDistinct *ndist = statext_ndistinct_deserialize(data);
     354             :     int         i;
     355             :     StringInfoData str;
     356             : 
     357           2 :     initStringInfo(&str);
     358           2 :     appendStringInfoChar(&str, '{');
     359             : 
     360          10 :     for (i = 0; i < ndist->nitems; i++)
     361             :     {
     362           8 :         MVNDistinctItem item = ndist->items[i];
     363           8 :         int         x = -1;
     364           8 :         bool        first = true;
     365             : 
     366           8 :         if (i > 0)
     367           6 :             appendStringInfoString(&str, ", ");
     368             : 
     369          34 :         while ((x = bms_next_member(item.attrs, x)) >= 0)
     370             :         {
     371          18 :             appendStringInfo(&str, "%s%d", first ? "\"" : ", ", x);
     372          18 :             first = false;
     373             :         }
     374           8 :         appendStringInfo(&str, "\": %d", (int) item.ndistinct);
     375             :     }
     376             : 
     377           2 :     appendStringInfoChar(&str, '}');
     378             : 
     379           2 :     PG_RETURN_CSTRING(str.data);
     380             : }
     381             : 
     382             : /*
     383             :  * pg_ndistinct_recv
     384             :  *      binary input routine for type pg_ndistinct
     385             :  */
     386             : Datum
     387           0 : pg_ndistinct_recv(PG_FUNCTION_ARGS)
     388             : {
     389           0 :     ereport(ERROR,
     390             :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     391             :              errmsg("cannot accept a value of type %s", "pg_ndistinct")));
     392             : 
     393             :     PG_RETURN_VOID();           /* keep compiler quiet */
     394             : }
     395             : 
     396             : /*
     397             :  * pg_ndistinct_send
     398             :  *      binary output routine for type pg_ndistinct
     399             :  *
     400             :  * n-distinct is serialized into a bytea value, so let's send that.
     401             :  */
     402             : Datum
     403           0 : pg_ndistinct_send(PG_FUNCTION_ARGS)
     404             : {
     405           0 :     return byteasend(fcinfo);
     406             : }
     407             : 
     408             : /*
     409             :  * ndistinct_for_combination
     410             :  *      Estimates number of distinct values in a combination of columns.
     411             :  *
     412             :  * This uses the same ndistinct estimator as compute_scalar_stats() in
     413             :  * ANALYZE, i.e.,
     414             :  *      n*d / (n - f1 + f1*n/N)
     415             :  *
     416             :  * except that instead of values in a single column we are dealing with
     417             :  * combination of multiple columns.
     418             :  */
     419             : static double
     420           9 : ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
     421             :                           VacAttrStats **stats, int k, int *combination)
     422             : {
     423             :     int         i,
     424             :                 j;
     425             :     int         f1,
     426             :                 cnt,
     427             :                 d;
     428             :     bool       *isnull;
     429             :     Datum      *values;
     430             :     SortItem   *items;
     431             :     MultiSortSupport mss;
     432             : 
     433           9 :     mss = multi_sort_init(k);
     434             : 
     435             :     /*
     436             :      * In order to determine the number of distinct elements, create separate
     437             :      * values[]/isnull[] arrays with all the data we have, then sort them
     438             :      * using the specified column combination as dimensions.  We could try to
     439             :      * sort in place, but it'd probably be more complex and bug-prone.
     440             :      */
     441           9 :     items = (SortItem *) palloc(numrows * sizeof(SortItem));
     442           9 :     values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
     443           9 :     isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
     444             : 
     445      161009 :     for (i = 0; i < numrows; i++)
     446             :     {
     447      161000 :         items[i].values = &values[i * k];
     448      161000 :         items[i].isnull = &isnull[i * k];
     449             :     }
     450             : 
     451             :     /*
     452             :      * For each dimension, set up sort-support and fill in the values from the
     453             :      * sample data.
     454             :      */
     455          29 :     for (i = 0; i < k; i++)
     456             :     {
     457          20 :         VacAttrStats *colstat = stats[combination[i]];
     458             :         TypeCacheEntry *type;
     459             : 
     460          20 :         type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
     461          20 :         if (type->lt_opr == InvalidOid) /* shouldn't happen */
     462           0 :             elog(ERROR, "cache lookup failed for ordering operator for type %u",
     463             :                  colstat->attrtypid);
     464             : 
     465             :         /* prepare the sort function for this dimension */
     466          20 :         multi_sort_add_dimension(mss, i, type->lt_opr);
     467             : 
     468             :         /* accumulate all the data for this dimension into the arrays */
     469      362020 :         for (j = 0; j < numrows; j++)
     470             :         {
     471      724000 :             items[j].values[i] =
     472      362000 :                 heap_getattr(rows[j],
     473             :                              colstat->attr->attnum,
     474             :                              colstat->tupDesc,
     475             :                              &items[j].isnull[i]);
     476             :         }
     477             :     }
     478             : 
     479             :     /* We can sort the array now ... */
     480           9 :     qsort_arg((void *) items, numrows, sizeof(SortItem),
     481             :               multi_sort_compare, mss);
     482             : 
     483             :     /* ... and count the number of distinct combinations */
     484             : 
     485           9 :     f1 = 0;
     486           9 :     cnt = 1;
     487           9 :     d = 1;
     488      161000 :     for (i = 1; i < numrows; i++)
     489             :     {
     490      160991 :         if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
     491             :         {
     492       17177 :             if (cnt == 1)
     493       10998 :                 f1 += 1;
     494             : 
     495       17177 :             d++;
     496       17177 :             cnt = 0;
     497             :         }
     498             : 
     499      160991 :         cnt += 1;
     500             :     }
     501             : 
     502           9 :     if (cnt == 1)
     503           6 :         f1 += 1;
     504             : 
     505           9 :     return estimate_ndistinct(totalrows, numrows, d, f1);
     506             : }
     507             : 
     508             : /* The Duj1 estimator (already used in analyze.c). */
     509             : static double
     510           9 : estimate_ndistinct(double totalrows, int numrows, int d, int f1)
     511             : {
     512             :     double      numer,
     513             :                 denom,
     514             :                 ndistinct;
     515             : 
     516           9 :     numer = (double) numrows * (double) d;
     517             : 
     518          27 :     denom = (double) (numrows - f1) +
     519          18 :         (double) f1 * (double) numrows / totalrows;
     520             : 
     521           9 :     ndistinct = numer / denom;
     522             : 
     523             :     /* Clamp to sane range in case of roundoff error */
     524           9 :     if (ndistinct < (double) d)
     525           0 :         ndistinct = (double) d;
     526             : 
     527           9 :     if (ndistinct > totalrows)
     528           0 :         ndistinct = totalrows;
     529             : 
     530           9 :     return floor(ndistinct + 0.5);
     531             : }
     532             : 
     533             : /*
     534             :  * n_choose_k
     535             :  *      computes binomial coefficients using an algorithm that is both
     536             :  *      efficient and prevents overflows
     537             :  */
     538             : static int
     539           5 : n_choose_k(int n, int k)
     540             : {
     541             :     int         d,
     542             :                 r;
     543             : 
     544           5 :     Assert((k > 0) && (n >= k));
     545             : 
     546             :     /* use symmetry of the binomial coefficients */
     547           5 :     k = Min(k, n - k);
     548             : 
     549           5 :     r = 1;
     550           7 :     for (d = 1; d <= k; ++d)
     551             :     {
     552           2 :         r *= n--;
     553           2 :         r /= d;
     554             :     }
     555             : 
     556           5 :     return r;
     557             : }
     558             : 
     559             : /*
     560             :  * num_combinations
     561             :  *      number of combinations, excluding single-value combinations
     562             :  */
     563             : static int
     564           3 : num_combinations(int n)
     565             : {
     566             :     int         k;
     567           3 :     int         ncombs = 1;
     568             : 
     569          11 :     for (k = 1; k <= n; k++)
     570           8 :         ncombs *= 2;
     571             : 
     572           3 :     ncombs -= (n + 1);
     573             : 
     574           3 :     return ncombs;
     575             : }
     576             : 
     577             : /*
     578             :  * generator_init
     579             :  *      initialize the generator of combinations
     580             :  *
     581             :  * The generator produces combinations of K elements in the interval (0..N).
     582             :  * We prebuild all the combinations in this method, which is simpler than
     583             :  * generating them on the fly.
     584             :  */
     585             : static CombinationGenerator *
     586           5 : generator_init(int n, int k)
     587             : {
     588             :     CombinationGenerator *state;
     589             : 
     590           5 :     Assert((n >= k) && (k > 0));
     591             : 
     592             :     /* allocate the generator state as a single chunk of memory */
     593           5 :     state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
     594             : 
     595           5 :     state->ncombinations = n_choose_k(n, k);
     596             : 
     597             :     /* pre-allocate space for all combinations */
     598           5 :     state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
     599             : 
     600           5 :     state->current = 0;
     601           5 :     state->k = k;
     602           5 :     state->n = n;
     603             : 
     604             :     /* now actually pre-generate all the combinations of K elements */
     605           5 :     generate_combinations(state);
     606             : 
     607             :     /* make sure we got the expected number of combinations */
     608           5 :     Assert(state->current == state->ncombinations);
     609             : 
     610             :     /* reset the number, so we start with the first one */
     611           5 :     state->current = 0;
     612             : 
     613           5 :     return state;
     614             : }
     615             : 
     616             : /*
     617             :  * generator_next
     618             :  *      returns the next combination from the prebuilt list
     619             :  *
     620             :  * Returns a combination of K array indexes (0 .. N), as specified to
     621             :  * generator_init), or NULL when there are no more combination.
     622             :  */
     623             : static int *
     624          14 : generator_next(CombinationGenerator *state)
     625             : {
     626          14 :     if (state->current == state->ncombinations)
     627           5 :         return NULL;
     628             : 
     629           9 :     return &state->combinations[state->k * state->current++];
     630             : }
     631             : 
     632             : /*
     633             :  * generator_free
     634             :  *      free the internal state of the generator
     635             :  *
     636             :  * Releases the generator internal state (pre-built combinations).
     637             :  */
     638             : static void
     639           5 : generator_free(CombinationGenerator *state)
     640             : {
     641           5 :     pfree(state->combinations);
     642           5 :     pfree(state);
     643           5 : }
     644             : 
     645             : /*
     646             :  * generate_combinations_recurse
     647             :  *      given a prefix, generate all possible combinations
     648             :  *
     649             :  * Given a prefix (first few elements of the combination), generate following
     650             :  * elements recursively. We generate the combinations in lexicographic order,
     651             :  * which eliminates permutations of the same combination.
     652             :  */
     653             : static void
     654          34 : generate_combinations_recurse(CombinationGenerator *state,
     655             :                               int index, int start, int *current)
     656             : {
     657             :     /* If we haven't filled all the elements, simply recurse. */
     658          34 :     if (index < state->k)
     659             :     {
     660             :         int         i;
     661             : 
     662             :         /*
     663             :          * The values have to be in ascending order, so make sure we start
     664             :          * with the value passed by parameter.
     665             :          */
     666             : 
     667          54 :         for (i = start; i < state->n; i++)
     668             :         {
     669          29 :             current[index] = i;
     670          29 :             generate_combinations_recurse(state, (index + 1), (i + 1), current);
     671             :         }
     672             : 
     673          59 :         return;
     674             :     }
     675             :     else
     676             :     {
     677             :         /* we got a valid combination, add it to the array */
     678           9 :         memcpy(&state->combinations[(state->k * state->current)],
     679           9 :                current, state->k * sizeof(int));
     680           9 :         state->current++;
     681             :     }
     682             : }
     683             : 
     684             : /*
     685             :  * generate_combinations
     686             :  *      generate all k-combinations of N elements
     687             :  */
     688             : static void
     689           5 : generate_combinations(CombinationGenerator *state)
     690             : {
     691           5 :     int        *current = (int *) palloc0(sizeof(int) * state->k);
     692             : 
     693           5 :     generate_combinations_recurse(state, 0, 0, current);
     694             : 
     695           5 :     pfree(current);
     696           5 : }

Generated by: LCOV version 1.11