Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * statscmds.c
4 : * Commands for creating and altering extended statistics objects
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/statscmds.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/relscan.h"
18 : #include "catalog/dependency.h"
19 : #include "catalog/indexing.h"
20 : #include "catalog/namespace.h"
21 : #include "catalog/pg_namespace.h"
22 : #include "catalog/pg_statistic_ext.h"
23 : #include "commands/defrem.h"
24 : #include "miscadmin.h"
25 : #include "statistics/statistics.h"
26 : #include "utils/builtins.h"
27 : #include "utils/inval.h"
28 : #include "utils/memutils.h"
29 : #include "utils/rel.h"
30 : #include "utils/syscache.h"
31 : #include "utils/typcache.h"
32 :
33 :
34 : /* qsort comparator for the attnums in CreateStatistics */
35 : static int
36 26 : compare_int16(const void *a, const void *b)
37 : {
38 26 : int av = *(const int16 *) a;
39 26 : int bv = *(const int16 *) b;
40 :
41 : /* this can't overflow if int is wider than int16 */
42 26 : return (av - bv);
43 : }
44 :
45 : /*
46 : * CREATE STATISTICS
47 : */
48 : ObjectAddress
49 31 : CreateStatistics(CreateStatsStmt *stmt)
50 : {
51 : int16 attnums[STATS_MAX_DIMENSIONS];
52 31 : int numcols = 0;
53 : char *namestr;
54 : NameData stxname;
55 : Oid statoid;
56 : Oid namespaceId;
57 31 : Oid stxowner = GetUserId();
58 : HeapTuple htup;
59 : Datum values[Natts_pg_statistic_ext];
60 : bool nulls[Natts_pg_statistic_ext];
61 : int2vector *stxkeys;
62 : Relation statrel;
63 31 : Relation rel = NULL;
64 : Oid relid;
65 : ObjectAddress parentobject,
66 : myself;
67 : Datum types[2]; /* one for each possible type of statistic */
68 : int ntypes;
69 : ArrayType *stxkind;
70 : bool build_ndistinct;
71 : bool build_dependencies;
72 31 : bool requested_type = false;
73 : int i;
74 : ListCell *cell;
75 :
76 31 : Assert(IsA(stmt, CreateStatsStmt));
77 :
78 : /* resolve the pieces of the name (namespace etc.) */
79 31 : namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
80 31 : namestrcpy(&stxname, namestr);
81 :
82 : /*
83 : * Deal with the possibility that the statistics object already exists.
84 : */
85 31 : if (SearchSysCacheExists2(STATEXTNAMENSP,
86 : NameGetDatum(&stxname),
87 : ObjectIdGetDatum(namespaceId)))
88 : {
89 1 : if (stmt->if_not_exists)
90 : {
91 1 : ereport(NOTICE,
92 : (errcode(ERRCODE_DUPLICATE_OBJECT),
93 : errmsg("statistics object \"%s\" already exists, skipping",
94 : namestr)));
95 1 : return InvalidObjectAddress;
96 : }
97 :
98 0 : ereport(ERROR,
99 : (errcode(ERRCODE_DUPLICATE_OBJECT),
100 : errmsg("statistics object \"%s\" already exists", namestr)));
101 : }
102 :
103 : /*
104 : * Examine the FROM clause. Currently, we only allow it to be a single
105 : * simple table, but later we'll probably allow multiple tables and JOIN
106 : * syntax. The grammar is already prepared for that, so we have to check
107 : * here that what we got is what we can support.
108 : */
109 30 : if (list_length(stmt->relations) != 1)
110 0 : ereport(ERROR,
111 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
112 : errmsg("only a single relation is allowed in CREATE STATISTICS")));
113 :
114 54 : foreach(cell, stmt->relations)
115 : {
116 30 : Node *rln = (Node *) lfirst(cell);
117 :
118 30 : if (!IsA(rln, RangeVar))
119 0 : ereport(ERROR,
120 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
121 : errmsg("only a single relation is allowed in CREATE STATISTICS")));
122 :
123 : /*
124 : * CREATE STATISTICS will influence future execution plans but does
125 : * not interfere with currently executing plans. So it should be
126 : * enough to take only ShareUpdateExclusiveLock on relation,
127 : * conflicting with ANALYZE and other DDL that sets statistical
128 : * information, but not with normal queries.
129 : */
130 30 : rel = relation_openrv((RangeVar *) rln, ShareUpdateExclusiveLock);
131 :
132 : /* Restrict to allowed relation types */
133 37 : if (rel->rd_rel->relkind != RELKIND_RELATION &&
134 15 : rel->rd_rel->relkind != RELKIND_MATVIEW &&
135 13 : rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
136 6 : rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
137 5 : ereport(ERROR,
138 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
139 : errmsg("relation \"%s\" is not a table, foreign table, or materialized view",
140 : RelationGetRelationName(rel))));
141 :
142 : /* You must own the relation to create stats on it */
143 24 : if (!pg_class_ownercheck(RelationGetRelid(rel), stxowner))
144 0 : aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
145 0 : RelationGetRelationName(rel));
146 : }
147 :
148 24 : Assert(rel);
149 24 : relid = RelationGetRelid(rel);
150 :
151 : /*
152 : * Currently, we only allow simple column references in the expression
153 : * list. That will change someday, and again the grammar already supports
154 : * it so we have to enforce restrictions here. For now, we can convert
155 : * the expression list to a simple array of attnums. While at it, enforce
156 : * some constraints.
157 : */
158 71 : foreach(cell, stmt->exprs)
159 : {
160 50 : Node *expr = (Node *) lfirst(cell);
161 : ColumnRef *cref;
162 : char *attname;
163 : HeapTuple atttuple;
164 : Form_pg_attribute attForm;
165 : TypeCacheEntry *type;
166 :
167 50 : if (!IsA(expr, ColumnRef))
168 2 : ereport(ERROR,
169 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
170 : errmsg("only simple column references are allowed in CREATE STATISTICS")));
171 48 : cref = (ColumnRef *) expr;
172 :
173 48 : if (list_length(cref->fields) != 1)
174 0 : ereport(ERROR,
175 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
176 : errmsg("only simple column references are allowed in CREATE STATISTICS")));
177 48 : attname = strVal((Value *) linitial(cref->fields));
178 :
179 48 : atttuple = SearchSysCacheAttName(relid, attname);
180 48 : if (!HeapTupleIsValid(atttuple))
181 1 : ereport(ERROR,
182 : (errcode(ERRCODE_UNDEFINED_COLUMN),
183 : errmsg("column \"%s\" referenced in statistics does not exist",
184 : attname)));
185 47 : attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
186 :
187 : /* Disallow use of system attributes in extended stats */
188 47 : if (attForm->attnum <= 0)
189 0 : ereport(ERROR,
190 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
191 : errmsg("statistics creation on system columns is not supported")));
192 :
193 : /* Disallow data types without a less-than operator */
194 47 : type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
195 47 : if (type->lt_opr == InvalidOid)
196 0 : ereport(ERROR,
197 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
198 : errmsg("column \"%s\" cannot be used in statistics because its type has no default btree operator class",
199 : attname)));
200 :
201 : /* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
202 47 : if (numcols >= STATS_MAX_DIMENSIONS)
203 0 : ereport(ERROR,
204 : (errcode(ERRCODE_TOO_MANY_COLUMNS),
205 : errmsg("cannot have more than %d columns in statistics",
206 : STATS_MAX_DIMENSIONS)));
207 :
208 47 : attnums[numcols] = attForm->attnum;
209 47 : numcols++;
210 47 : ReleaseSysCache(atttuple);
211 : }
212 :
213 : /*
214 : * Check that at least two columns were specified in the statement. The
215 : * upper bound was already checked in the loop above.
216 : */
217 21 : if (numcols < 2)
218 0 : ereport(ERROR,
219 : (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
220 : errmsg("extended statistics require at least 2 columns")));
221 :
222 : /*
223 : * Sort the attnums, which makes detecting duplicates somewhat easier, and
224 : * it does not hurt (it does not affect the efficiency, unlike for
225 : * indexes, for example).
226 : */
227 21 : qsort(attnums, numcols, sizeof(int16), compare_int16);
228 :
229 : /*
230 : * Check for duplicates in the list of columns. The attnums are sorted so
231 : * just check consecutive elements.
232 : */
233 45 : for (i = 1; i < numcols; i++)
234 : {
235 25 : if (attnums[i] == attnums[i - 1])
236 1 : ereport(ERROR,
237 : (errcode(ERRCODE_DUPLICATE_COLUMN),
238 : errmsg("duplicate column name in statistics definition")));
239 : }
240 :
241 : /* Form an int2vector representation of the sorted column list */
242 20 : stxkeys = buildint2vector(attnums, numcols);
243 :
244 : /*
245 : * Parse the statistics types.
246 : */
247 20 : build_ndistinct = false;
248 20 : build_dependencies = false;
249 22 : foreach(cell, stmt->stat_types)
250 : {
251 3 : char *type = strVal((Value *) lfirst(cell));
252 :
253 3 : if (strcmp(type, "ndistinct") == 0)
254 : {
255 0 : build_ndistinct = true;
256 0 : requested_type = true;
257 : }
258 3 : else if (strcmp(type, "dependencies") == 0)
259 : {
260 2 : build_dependencies = true;
261 2 : requested_type = true;
262 : }
263 : else
264 1 : ereport(ERROR,
265 : (errcode(ERRCODE_SYNTAX_ERROR),
266 : errmsg("unrecognized statistic type \"%s\"",
267 : type)));
268 : }
269 : /* If no statistic type was specified, build them all. */
270 19 : if (!requested_type)
271 : {
272 17 : build_ndistinct = true;
273 17 : build_dependencies = true;
274 : }
275 :
276 : /* construct the char array of enabled statistic types */
277 19 : ntypes = 0;
278 19 : if (build_ndistinct)
279 17 : types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
280 19 : if (build_dependencies)
281 19 : types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
282 19 : Assert(ntypes > 0 && ntypes <= lengthof(types));
283 19 : stxkind = construct_array(types, ntypes, CHAROID, 1, true, 'c');
284 :
285 : /*
286 : * Everything seems fine, so let's build the pg_statistic_ext tuple.
287 : */
288 19 : memset(values, 0, sizeof(values));
289 19 : memset(nulls, false, sizeof(nulls));
290 19 : values[Anum_pg_statistic_ext_stxrelid - 1] = ObjectIdGetDatum(relid);
291 19 : values[Anum_pg_statistic_ext_stxname - 1] = NameGetDatum(&stxname);
292 19 : values[Anum_pg_statistic_ext_stxnamespace - 1] = ObjectIdGetDatum(namespaceId);
293 19 : values[Anum_pg_statistic_ext_stxowner - 1] = ObjectIdGetDatum(stxowner);
294 19 : values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
295 19 : values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
296 :
297 : /* no statistics built yet */
298 19 : nulls[Anum_pg_statistic_ext_stxndistinct - 1] = true;
299 19 : nulls[Anum_pg_statistic_ext_stxdependencies - 1] = true;
300 :
301 : /* insert it into pg_statistic_ext */
302 19 : statrel = heap_open(StatisticExtRelationId, RowExclusiveLock);
303 19 : htup = heap_form_tuple(statrel->rd_att, values, nulls);
304 19 : statoid = CatalogTupleInsert(statrel, htup);
305 19 : heap_freetuple(htup);
306 19 : relation_close(statrel, RowExclusiveLock);
307 :
308 : /*
309 : * Invalidate relcache so that others see the new statistics object.
310 : */
311 19 : CacheInvalidateRelcache(rel);
312 :
313 19 : relation_close(rel, NoLock);
314 :
315 : /*
316 : * Add an AUTO dependency on each column used in the stats, so that the
317 : * stats object goes away if any or all of them get dropped.
318 : */
319 19 : ObjectAddressSet(myself, StatisticExtRelationId, statoid);
320 :
321 61 : for (i = 0; i < numcols; i++)
322 : {
323 42 : ObjectAddressSubSet(parentobject, RelationRelationId, relid, attnums[i]);
324 42 : recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
325 : }
326 :
327 : /*
328 : * Also add dependencies on namespace and owner. These are required
329 : * because the stats object might have a different namespace and/or owner
330 : * than the underlying table(s).
331 : */
332 19 : ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
333 19 : recordDependencyOn(&myself, &parentobject, DEPENDENCY_NORMAL);
334 :
335 19 : recordDependencyOnOwner(StatisticExtRelationId, statoid, stxowner);
336 :
337 : /*
338 : * XXX probably there should be a recordDependencyOnCurrentExtension call
339 : * here too, but we'd have to add support for ALTER EXTENSION ADD/DROP
340 : * STATISTICS, which is more work than it seems worth.
341 : */
342 :
343 : /* Return stats object's address */
344 19 : return myself;
345 : }
346 :
347 : /*
348 : * Guts of statistics object deletion.
349 : */
350 : void
351 18 : RemoveStatisticsById(Oid statsOid)
352 : {
353 : Relation relation;
354 : HeapTuple tup;
355 : Form_pg_statistic_ext statext;
356 : Oid relid;
357 :
358 : /*
359 : * Delete the pg_statistic_ext tuple. Also send out a cache inval on the
360 : * associated table, so that dependent plans will be rebuilt.
361 : */
362 18 : relation = heap_open(StatisticExtRelationId, RowExclusiveLock);
363 :
364 18 : tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
365 :
366 18 : if (!HeapTupleIsValid(tup)) /* should not happen */
367 0 : elog(ERROR, "cache lookup failed for statistics object %u", statsOid);
368 :
369 18 : statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
370 18 : relid = statext->stxrelid;
371 :
372 18 : CacheInvalidateRelcacheByRelid(relid);
373 :
374 18 : CatalogTupleDelete(relation, &tup->t_self);
375 :
376 18 : ReleaseSysCache(tup);
377 :
378 18 : heap_close(relation, RowExclusiveLock);
379 18 : }
380 :
381 : /*
382 : * Update a statistics object for ALTER COLUMN TYPE on a source column.
383 : *
384 : * This could throw an error if the type change can't be supported.
385 : * If it can be supported, but the stats must be recomputed, a likely choice
386 : * would be to set the relevant column(s) of the pg_statistic_ext tuple to
387 : * null until the next ANALYZE. (Note that the type change hasn't actually
388 : * happened yet, so one option that's *not* on the table is to recompute
389 : * immediately.)
390 : */
391 : void
392 1 : UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
393 : Oid oldColumnType, Oid newColumnType)
394 : {
395 : /*
396 : * Currently, we don't actually need to do anything here. For both
397 : * ndistinct and functional-dependencies stats, the on-disk representation
398 : * is independent of the source column data types, and it is plausible to
399 : * assume that the old statistic values will still be good for the new
400 : * column contents. (Obviously, if the ALTER COLUMN TYPE has a USING
401 : * expression that substantially alters the semantic meaning of the column
402 : * values, this assumption could fail. But that seems like a corner case
403 : * that doesn't justify zapping the stats in common cases.)
404 : *
405 : * Future types of extended stats will likely require us to work harder.
406 : */
407 1 : }
|