Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * extended_stats.c
4 : * POSTGRES extended statistics
5 : *
6 : * Generic code supporting statistics objects created via CREATE STATISTICS.
7 : *
8 : *
9 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
10 : * Portions Copyright (c) 1994, Regents of the University of California
11 : *
12 : * IDENTIFICATION
13 : * src/backend/statistics/extended_stats.c
14 : *
15 : *-------------------------------------------------------------------------
16 : */
17 : #include "postgres.h"
18 :
19 : #include "access/genam.h"
20 : #include "access/heapam.h"
21 : #include "access/htup_details.h"
22 : #include "catalog/indexing.h"
23 : #include "catalog/pg_collation.h"
24 : #include "catalog/pg_statistic_ext.h"
25 : #include "nodes/relation.h"
26 : #include "postmaster/autovacuum.h"
27 : #include "statistics/extended_stats_internal.h"
28 : #include "statistics/statistics.h"
29 : #include "utils/builtins.h"
30 : #include "utils/fmgroids.h"
31 : #include "utils/lsyscache.h"
32 : #include "utils/memutils.h"
33 : #include "utils/rel.h"
34 : #include "utils/syscache.h"
35 :
36 :
37 : /*
38 : * Used internally to refer to an individual statistics object, i.e.,
39 : * a pg_statistic_ext entry.
40 : */
41 : typedef struct StatExtEntry
42 : {
43 : Oid statOid; /* OID of pg_statistic_ext entry */
44 : char *schema; /* statistics object's schema */
45 : char *name; /* statistics object's name */
46 : Bitmapset *columns; /* attribute numbers covered by the object */
47 : List *types; /* 'char' list of enabled statistic kinds */
48 : } StatExtEntry;
49 :
50 :
51 : static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
52 : static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
53 : int nvacatts, VacAttrStats **vacatts);
54 : static void statext_store(Relation pg_stext, Oid relid,
55 : MVNDistinct *ndistinct, MVDependencies *dependencies,
56 : VacAttrStats **stats);
57 :
58 :
59 : /*
60 : * Compute requested extended stats, using the rows sampled for the plain
61 : * (single-column) stats.
62 : *
63 : * This fetches a list of stats types from pg_statistic_ext, computes the
64 : * requested stats, and serializes them back into the catalog.
65 : */
66 : void
67 178 : BuildRelationExtStatistics(Relation onerel, double totalrows,
68 : int numrows, HeapTuple *rows,
69 : int natts, VacAttrStats **vacattrstats)
70 : {
71 : Relation pg_stext;
72 : ListCell *lc;
73 : List *stats;
74 : MemoryContext cxt;
75 : MemoryContext oldcxt;
76 :
77 178 : cxt = AllocSetContextCreate(CurrentMemoryContext, "stats ext",
78 : ALLOCSET_DEFAULT_SIZES);
79 178 : oldcxt = MemoryContextSwitchTo(cxt);
80 :
81 178 : pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
82 178 : stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
83 :
84 186 : foreach(lc, stats)
85 : {
86 8 : StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
87 8 : MVNDistinct *ndistinct = NULL;
88 8 : MVDependencies *dependencies = NULL;
89 : VacAttrStats **stats;
90 : ListCell *lc2;
91 :
92 : /*
93 : * Check if we can build these stats based on the column analyzed. If
94 : * not, report this fact (except in autovacuum) and move on.
95 : */
96 8 : stats = lookup_var_attr_stats(onerel, stat->columns,
97 : natts, vacattrstats);
98 8 : if (!stats && !IsAutoVacuumWorkerProcess())
99 : {
100 2 : ereport(WARNING,
101 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
102 : errmsg("statistics object \"%s.%s\" could not be computed for relation \"%s.%s\"",
103 : stat->schema, stat->name,
104 : get_namespace_name(onerel->rd_rel->relnamespace),
105 : RelationGetRelationName(onerel)),
106 : errtable(onerel)));
107 2 : continue;
108 : }
109 :
110 : /* check allowed number of dimensions */
111 6 : Assert(bms_num_members(stat->columns) >= 2 &&
112 : bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
113 :
114 : /* compute statistic of each requested type */
115 15 : foreach(lc2, stat->types)
116 : {
117 9 : char t = (char) lfirst_int(lc2);
118 :
119 9 : if (t == STATS_EXT_NDISTINCT)
120 3 : ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
121 : stat->columns, stats);
122 6 : else if (t == STATS_EXT_DEPENDENCIES)
123 6 : dependencies = statext_dependencies_build(numrows, rows,
124 : stat->columns, stats);
125 : }
126 :
127 : /* store the statistics in the catalog */
128 6 : statext_store(pg_stext, stat->statOid, ndistinct, dependencies, stats);
129 : }
130 :
131 178 : heap_close(pg_stext, RowExclusiveLock);
132 :
133 178 : MemoryContextSwitchTo(oldcxt);
134 178 : MemoryContextDelete(cxt);
135 178 : }
136 :
137 : /*
138 : * statext_is_kind_built
139 : * Is this stat kind built in the given pg_statistic_ext tuple?
140 : */
141 : bool
142 32 : statext_is_kind_built(HeapTuple htup, char type)
143 : {
144 : AttrNumber attnum;
145 :
146 32 : switch (type)
147 : {
148 : case STATS_EXT_NDISTINCT:
149 16 : attnum = Anum_pg_statistic_ext_stxndistinct;
150 16 : break;
151 :
152 : case STATS_EXT_DEPENDENCIES:
153 16 : attnum = Anum_pg_statistic_ext_stxdependencies;
154 16 : break;
155 :
156 : default:
157 0 : elog(ERROR, "unexpected statistics type requested: %d", type);
158 : }
159 :
160 32 : return !heap_attisnull(htup, attnum);
161 : }
162 :
163 : /*
164 : * Return a list (of StatExtEntry) of statistics objects for the given relation.
165 : */
166 : static List *
167 178 : fetch_statentries_for_relation(Relation pg_statext, Oid relid)
168 : {
169 : SysScanDesc scan;
170 : ScanKeyData skey;
171 : HeapTuple htup;
172 178 : List *result = NIL;
173 :
174 : /*
175 : * Prepare to scan pg_statistic_ext for entries having stxrelid = this
176 : * rel.
177 : */
178 178 : ScanKeyInit(&skey,
179 : Anum_pg_statistic_ext_stxrelid,
180 : BTEqualStrategyNumber, F_OIDEQ,
181 : ObjectIdGetDatum(relid));
182 :
183 178 : scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
184 : NULL, 1, &skey);
185 :
186 364 : while (HeapTupleIsValid(htup = systable_getnext(scan)))
187 : {
188 : StatExtEntry *entry;
189 : Datum datum;
190 : bool isnull;
191 : int i;
192 : ArrayType *arr;
193 : char *enabled;
194 : Form_pg_statistic_ext staForm;
195 :
196 8 : entry = palloc0(sizeof(StatExtEntry));
197 8 : entry->statOid = HeapTupleGetOid(htup);
198 8 : staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
199 8 : entry->schema = get_namespace_name(staForm->stxnamespace);
200 8 : entry->name = pstrdup(NameStr(staForm->stxname));
201 29 : for (i = 0; i < staForm->stxkeys.dim1; i++)
202 : {
203 21 : entry->columns = bms_add_member(entry->columns,
204 21 : staForm->stxkeys.values[i]);
205 : }
206 :
207 : /* decode the stxkind char array into a list of chars */
208 8 : datum = SysCacheGetAttr(STATEXTOID, htup,
209 : Anum_pg_statistic_ext_stxkind, &isnull);
210 8 : Assert(!isnull);
211 8 : arr = DatumGetArrayTypeP(datum);
212 16 : if (ARR_NDIM(arr) != 1 ||
213 16 : ARR_HASNULL(arr) ||
214 8 : ARR_ELEMTYPE(arr) != CHAROID)
215 0 : elog(ERROR, "stxkind is not a 1-D char array");
216 8 : enabled = (char *) ARR_DATA_PTR(arr);
217 21 : for (i = 0; i < ARR_DIMS(arr)[0]; i++)
218 : {
219 13 : Assert((enabled[i] == STATS_EXT_NDISTINCT) ||
220 : (enabled[i] == STATS_EXT_DEPENDENCIES));
221 13 : entry->types = lappend_int(entry->types, (int) enabled[i]);
222 : }
223 :
224 8 : result = lappend(result, entry);
225 : }
226 :
227 178 : systable_endscan(scan);
228 :
229 178 : return result;
230 : }
231 :
232 : /*
233 : * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
234 : * VacAttrStats array which includes only the items corresponding to
235 : * attributes indicated by 'stxkeys'. If we don't have all of the per column
236 : * stats available to compute the extended stats, then we return NULL to indicate
237 : * to the caller that the stats should not be built.
238 : */
239 : static VacAttrStats **
240 8 : lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
241 : int nvacatts, VacAttrStats **vacatts)
242 : {
243 8 : int i = 0;
244 8 : int x = -1;
245 : VacAttrStats **stats;
246 :
247 8 : stats = (VacAttrStats **)
248 8 : palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
249 :
250 : /* lookup VacAttrStats info for the requested columns (same attnum) */
251 34 : while ((x = bms_next_member(attrs, x)) >= 0)
252 : {
253 : int j;
254 :
255 20 : stats[i] = NULL;
256 73 : for (j = 0; j < nvacatts; j++)
257 : {
258 71 : if (x == vacatts[j]->tupattnum)
259 : {
260 18 : stats[i] = vacatts[j];
261 18 : break;
262 : }
263 : }
264 :
265 20 : if (!stats[i])
266 : {
267 : /*
268 : * Looks like stats were not gathered for one of the columns
269 : * required. We'll be unable to build the extended stats without
270 : * this column.
271 : */
272 2 : pfree(stats);
273 2 : return NULL;
274 : }
275 :
276 : /*
277 : * Sanity check that the column is not dropped - stats should have
278 : * been removed in this case.
279 : */
280 18 : Assert(!stats[i]->attr->attisdropped);
281 :
282 18 : i++;
283 : }
284 :
285 6 : return stats;
286 : }
287 :
288 : /*
289 : * statext_store
290 : * Serializes the statistics and stores them into the pg_statistic_ext tuple.
291 : */
292 : static void
293 6 : statext_store(Relation pg_stext, Oid statOid,
294 : MVNDistinct *ndistinct, MVDependencies *dependencies,
295 : VacAttrStats **stats)
296 : {
297 : HeapTuple stup,
298 : oldtup;
299 : Datum values[Natts_pg_statistic_ext];
300 : bool nulls[Natts_pg_statistic_ext];
301 : bool replaces[Natts_pg_statistic_ext];
302 :
303 6 : memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool));
304 6 : memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool));
305 6 : memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum));
306 :
307 : /*
308 : * Construct a new pg_statistic_ext tuple, replacing the calculated stats.
309 : */
310 6 : if (ndistinct != NULL)
311 : {
312 3 : bytea *data = statext_ndistinct_serialize(ndistinct);
313 :
314 3 : nulls[Anum_pg_statistic_ext_stxndistinct - 1] = (data == NULL);
315 3 : values[Anum_pg_statistic_ext_stxndistinct - 1] = PointerGetDatum(data);
316 : }
317 :
318 6 : if (dependencies != NULL)
319 : {
320 4 : bytea *data = statext_dependencies_serialize(dependencies);
321 :
322 4 : nulls[Anum_pg_statistic_ext_stxdependencies - 1] = (data == NULL);
323 4 : values[Anum_pg_statistic_ext_stxdependencies - 1] = PointerGetDatum(data);
324 : }
325 :
326 : /* always replace the value (either by bytea or NULL) */
327 6 : replaces[Anum_pg_statistic_ext_stxndistinct - 1] = true;
328 6 : replaces[Anum_pg_statistic_ext_stxdependencies - 1] = true;
329 :
330 : /* there should already be a pg_statistic_ext tuple */
331 6 : oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
332 6 : if (!HeapTupleIsValid(oldtup))
333 0 : elog(ERROR, "cache lookup failed for statistics object %u", statOid);
334 :
335 : /* replace it */
336 6 : stup = heap_modify_tuple(oldtup,
337 : RelationGetDescr(pg_stext),
338 : values,
339 : nulls,
340 : replaces);
341 6 : ReleaseSysCache(oldtup);
342 6 : CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
343 :
344 6 : heap_freetuple(stup);
345 6 : }
346 :
347 : /* initialize multi-dimensional sort */
348 : MultiSortSupport
349 56 : multi_sort_init(int ndims)
350 : {
351 : MultiSortSupport mss;
352 :
353 56 : Assert(ndims >= 2);
354 :
355 56 : mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
356 56 : + sizeof(SortSupportData) * ndims);
357 :
358 56 : mss->ndims = ndims;
359 :
360 56 : return mss;
361 : }
362 :
363 : /*
364 : * Prepare sort support info using the given sort operator
365 : * at the position 'sortdim'
366 : */
367 : void
368 129 : multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
369 : {
370 129 : SortSupport ssup = &mss->ssup[sortdim];
371 :
372 129 : ssup->ssup_cxt = CurrentMemoryContext;
373 129 : ssup->ssup_collation = DEFAULT_COLLATION_OID;
374 129 : ssup->ssup_nulls_first = false;
375 129 : ssup->ssup_cxt = CurrentMemoryContext;
376 :
377 129 : PrepareSortSupportFromOrderingOp(oper, ssup);
378 129 : }
379 :
380 : /* compare all the dimensions in the selected order */
381 : int
382 3288493 : multi_sort_compare(const void *a, const void *b, void *arg)
383 : {
384 3288493 : MultiSortSupport mss = (MultiSortSupport) arg;
385 3288493 : SortItem *ia = (SortItem *) a;
386 3288493 : SortItem *ib = (SortItem *) b;
387 : int i;
388 :
389 6098276 : for (i = 0; i < mss->ndims; i++)
390 : {
391 : int compare;
392 :
393 10716246 : compare = ApplySortComparator(ia->values[i], ia->isnull[i],
394 10716246 : ib->values[i], ib->isnull[i],
395 5358123 : &mss->ssup[i]);
396 :
397 5358123 : if (compare != 0)
398 2548340 : return compare;
399 : }
400 :
401 : /* equal by default */
402 740153 : return 0;
403 : }
404 :
405 : /* compare selected dimension */
406 : int
407 484375 : multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
408 : MultiSortSupport mss)
409 : {
410 968750 : return ApplySortComparator(a->values[dim], a->isnull[dim],
411 968750 : b->values[dim], b->isnull[dim],
412 484375 : &mss->ssup[dim]);
413 : }
414 :
415 : int
416 496953 : multi_sort_compare_dims(int start, int end,
417 : const SortItem *a, const SortItem *b,
418 : MultiSortSupport mss)
419 : {
420 : int dim;
421 :
422 1144699 : for (dim = start; dim <= end; dim++)
423 : {
424 1320648 : int r = ApplySortComparator(a->values[dim], a->isnull[dim],
425 1320648 : b->values[dim], b->isnull[dim],
426 660324 : &mss->ssup[dim]);
427 :
428 660324 : if (r != 0)
429 12578 : return r;
430 : }
431 :
432 484375 : return 0;
433 : }
434 :
435 : /*
436 : * has_stats_of_kind
437 : * Check whether the list contains statistic of a given kind
438 : */
439 : bool
440 20 : has_stats_of_kind(List *stats, char requiredkind)
441 : {
442 : ListCell *l;
443 :
444 20 : foreach(l, stats)
445 : {
446 20 : StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l);
447 :
448 20 : if (stat->kind == requiredkind)
449 20 : return true;
450 : }
451 :
452 0 : return false;
453 : }
454 :
455 : /*
456 : * choose_best_statistics
457 : * Look for and return statistics with the specified 'requiredkind' which
458 : * have keys that match at least two of the given attnums. Return NULL if
459 : * there's no match.
460 : *
461 : * The current selection criteria is very simple - we choose the statistics
462 : * object referencing the most of the requested attributes, breaking ties
463 : * in favor of objects with fewer keys overall.
464 : *
465 : * XXX if multiple statistics objects tie on both criteria, then which object
466 : * is chosen depends on the order that they appear in the stats list. Perhaps
467 : * further tiebreakers are needed.
468 : */
469 : StatisticExtInfo *
470 20 : choose_best_statistics(List *stats, Bitmapset *attnums, char requiredkind)
471 : {
472 : ListCell *lc;
473 20 : StatisticExtInfo *best_match = NULL;
474 20 : int best_num_matched = 2; /* goal #1: maximize */
475 20 : int best_match_keys = (STATS_MAX_DIMENSIONS + 1); /* goal #2: minimize */
476 :
477 40 : foreach(lc, stats)
478 : {
479 20 : StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
480 : int num_matched;
481 : int numkeys;
482 : Bitmapset *matched;
483 :
484 : /* skip statistics that are not of the correct type */
485 20 : if (info->kind != requiredkind)
486 0 : continue;
487 :
488 : /* determine how many attributes of these stats can be matched to */
489 20 : matched = bms_intersect(attnums, info->keys);
490 20 : num_matched = bms_num_members(matched);
491 20 : bms_free(matched);
492 :
493 : /*
494 : * save the actual number of keys in the stats so that we can choose
495 : * the narrowest stats with the most matching keys.
496 : */
497 20 : numkeys = bms_num_members(info->keys);
498 :
499 : /*
500 : * Use this object when it increases the number of matched clauses or
501 : * when it matches the same number of attributes but these stats have
502 : * fewer keys than any previous match.
503 : */
504 20 : if (num_matched > best_num_matched ||
505 11 : (num_matched == best_num_matched && numkeys < best_match_keys))
506 : {
507 20 : best_match = info;
508 20 : best_num_matched = num_matched;
509 20 : best_match_keys = numkeys;
510 : }
511 : }
512 :
513 20 : return best_match;
514 : }
|