Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * planagg.c
4 : * Special planning for aggregate queries.
5 : *
6 : * This module tries to replace MIN/MAX aggregate functions by subqueries
7 : * of the form
8 : * (SELECT col FROM tab
9 : * WHERE col IS NOT NULL AND existing-quals
10 : * ORDER BY col ASC/DESC
11 : * LIMIT 1)
12 : * Given a suitable index on tab.col, this can be much faster than the
13 : * generic scan-all-the-rows aggregation plan. We can handle multiple
14 : * MIN/MAX aggregates by generating multiple subqueries, and their
15 : * orderings can be different. However, if the query contains any
16 : * non-optimizable aggregates, there's no point since we'll have to
17 : * scan all the rows anyway.
18 : *
19 : *
20 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
21 : * Portions Copyright (c) 1994, Regents of the University of California
22 : *
23 : *
24 : * IDENTIFICATION
25 : * src/backend/optimizer/plan/planagg.c
26 : *
27 : *-------------------------------------------------------------------------
28 : */
29 : #include "postgres.h"
30 :
31 : #include "access/htup_details.h"
32 : #include "catalog/pg_aggregate.h"
33 : #include "catalog/pg_type.h"
34 : #include "nodes/makefuncs.h"
35 : #include "nodes/nodeFuncs.h"
36 : #include "optimizer/clauses.h"
37 : #include "optimizer/cost.h"
38 : #include "optimizer/pathnode.h"
39 : #include "optimizer/paths.h"
40 : #include "optimizer/planmain.h"
41 : #include "optimizer/subselect.h"
42 : #include "optimizer/tlist.h"
43 : #include "parser/parsetree.h"
44 : #include "parser/parse_clause.h"
45 : #include "rewrite/rewriteManip.h"
46 : #include "utils/lsyscache.h"
47 : #include "utils/syscache.h"
48 :
49 :
50 : static bool find_minmax_aggs_walker(Node *node, List **context);
51 : static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
52 : Oid eqop, Oid sortop, bool nulls_first);
53 : static void minmax_qp_callback(PlannerInfo *root, void *extra);
54 : static Oid fetch_agg_sort_op(Oid aggfnoid);
55 :
56 :
57 : /*
58 : * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
59 : *
60 : * Check to see whether the query contains MIN/MAX aggregate functions that
61 : * might be optimizable via indexscans. If it does, and all the aggregates
62 : * are potentially optimizable, then create a MinMaxAggPath and add it to
63 : * the (UPPERREL_GROUP_AGG, NULL) upperrel.
64 : *
65 : * This should be called by grouping_planner() just before it's ready to call
66 : * query_planner(), because we generate indexscan paths by cloning the
67 : * planner's state and invoking query_planner() on a modified version of
68 : * the query parsetree. Thus, all preprocessing needed before query_planner()
69 : * must already be done.
70 : *
71 : * Note: we are passed the preprocessed targetlist separately, because it's
72 : * not necessarily equal to root->parse->targetList.
73 : */
74 : void
75 2365 : preprocess_minmax_aggregates(PlannerInfo *root, List *tlist)
76 : {
77 2365 : Query *parse = root->parse;
78 : FromExpr *jtnode;
79 : RangeTblRef *rtr;
80 : RangeTblEntry *rte;
81 : List *aggs_list;
82 : RelOptInfo *grouped_rel;
83 : ListCell *lc;
84 :
85 : /* minmax_aggs list should be empty at this point */
86 2365 : Assert(root->minmax_aggs == NIL);
87 :
88 : /* Nothing to do if query has no aggregates */
89 2365 : if (!parse->hasAggs)
90 2279 : return;
91 :
92 2365 : Assert(!parse->setOperations); /* shouldn't get here if a setop */
93 2365 : Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
94 :
95 : /*
96 : * Reject unoptimizable cases.
97 : *
98 : * We don't handle GROUP BY or windowing, because our current
99 : * implementations of grouping require looking at all the rows anyway, and
100 : * so there's not much point in optimizing MIN/MAX.
101 : */
102 4461 : if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
103 2096 : parse->hasWindowFuncs)
104 270 : return;
105 :
106 : /*
107 : * Reject if query contains any CTEs; there's no way to build an indexscan
108 : * on one so we couldn't succeed here. (If the CTEs are unreferenced,
109 : * that's not true, but it doesn't seem worth expending cycles to check.)
110 : */
111 2095 : if (parse->cteList)
112 2 : return;
113 :
114 : /*
115 : * We also restrict the query to reference exactly one table, since join
116 : * conditions can't be handled reasonably. (We could perhaps handle a
117 : * query containing cartesian-product joins, but it hardly seems worth the
118 : * trouble.) However, the single table could be buried in several levels
119 : * of FromExpr due to subqueries. Note the "single" table could be an
120 : * inheritance parent, too, including the case of a UNION ALL subquery
121 : * that's been flattened to an appendrel.
122 : */
123 2093 : jtnode = parse->jointree;
124 6310 : while (IsA(jtnode, FromExpr))
125 : {
126 2165 : if (list_length(jtnode->fromlist) != 1)
127 41 : return;
128 2124 : jtnode = linitial(jtnode->fromlist);
129 : }
130 2052 : if (!IsA(jtnode, RangeTblRef))
131 191 : return;
132 1861 : rtr = (RangeTblRef *) jtnode;
133 1861 : rte = planner_rt_fetch(rtr->rtindex, root);
134 1861 : if (rte->rtekind == RTE_RELATION)
135 : /* ordinary relation, ok */ ;
136 154 : else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
137 : /* flattened UNION ALL subquery, ok */ ;
138 : else
139 152 : return;
140 :
141 : /*
142 : * Scan the tlist and HAVING qual to find all the aggregates and verify
143 : * all are MIN/MAX aggregates. Stop as soon as we find one that isn't.
144 : */
145 1709 : aggs_list = NIL;
146 1709 : if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
147 1592 : return;
148 117 : if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
149 0 : return;
150 :
151 : /*
152 : * OK, there is at least the possibility of performing the optimization.
153 : * Build an access path for each aggregate. If any of the aggregates
154 : * prove to be non-indexable, give up; there is no point in optimizing
155 : * just some of them.
156 : */
157 209 : foreach(lc, aggs_list)
158 : {
159 123 : MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
160 : Oid eqop;
161 : bool reverse;
162 :
163 : /*
164 : * We'll need the equality operator that goes with the aggregate's
165 : * ordering operator.
166 : */
167 123 : eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
168 123 : if (!OidIsValid(eqop)) /* shouldn't happen */
169 0 : elog(ERROR, "could not find equality operator for ordering operator %u",
170 : mminfo->aggsortop);
171 :
172 : /*
173 : * We can use either an ordering that gives NULLS FIRST or one that
174 : * gives NULLS LAST; furthermore there's unlikely to be much
175 : * performance difference between them, so it doesn't seem worth
176 : * costing out both ways if we get a hit on the first one. NULLS
177 : * FIRST is more likely to be available if the operator is a
178 : * reverse-sort operator, so try that first if reverse.
179 : */
180 123 : if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
181 184 : continue;
182 31 : if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
183 0 : continue;
184 :
185 : /* No indexable path for this aggregate, so fail */
186 31 : return;
187 : }
188 :
189 : /*
190 : * OK, we can do the query this way. Prepare to create a MinMaxAggPath
191 : * node.
192 : *
193 : * First, create an output Param node for each agg. (If we end up not
194 : * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
195 : * which is not worth worrying about. We can't wait till create_plan time
196 : * to decide whether to make the Param, unfortunately.)
197 : */
198 178 : foreach(lc, aggs_list)
199 : {
200 92 : MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
201 :
202 92 : mminfo->param =
203 184 : SS_make_initplan_output_param(root,
204 92 : exprType((Node *) mminfo->target),
205 : -1,
206 92 : exprCollation((Node *) mminfo->target));
207 : }
208 :
209 : /*
210 : * Create a MinMaxAggPath node with the appropriate estimated costs and
211 : * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
212 : * it will compete against the standard aggregate implementation. (It
213 : * will likely always win, but we need not assume that here.)
214 : *
215 : * Note: grouping_planner won't have created this upperrel yet, but it's
216 : * fine for us to create it first. We will not have inserted the correct
217 : * consider_parallel value in it, but MinMaxAggPath paths are currently
218 : * never parallel-safe anyway, so that doesn't matter. Likewise, it
219 : * doesn't matter that we haven't filled FDW-related fields in the rel.
220 : */
221 86 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
222 172 : add_path(grouped_rel, (Path *)
223 86 : create_minmaxagg_path(root, grouped_rel,
224 : create_pathtarget(root, tlist),
225 : aggs_list,
226 86 : (List *) parse->havingQual));
227 : }
228 :
229 : /*
230 : * find_minmax_aggs_walker
231 : * Recursively scan the Aggref nodes in an expression tree, and check
232 : * that each one is a MIN/MAX aggregate. If so, build a list of the
233 : * distinct aggregate calls in the tree.
234 : *
235 : * Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
236 : * (This seemingly-backward definition is used because expression_tree_walker
237 : * aborts the scan on TRUE return, which is what we want.)
238 : *
239 : * Found aggregates are added to the list at *context; it's up to the caller
240 : * to initialize the list to NIL.
241 : *
242 : * This does not descend into subqueries, and so should be used only after
243 : * reduction of sublinks to subplans. There mustn't be outer-aggregate
244 : * references either.
245 : */
246 : static bool
247 5407 : find_minmax_aggs_walker(Node *node, List **context)
248 : {
249 5407 : if (node == NULL)
250 117 : return false;
251 5290 : if (IsA(node, Aggref))
252 : {
253 1773 : Aggref *aggref = (Aggref *) node;
254 : Oid aggsortop;
255 : TargetEntry *curTarget;
256 : MinMaxAggInfo *mminfo;
257 : ListCell *l;
258 :
259 1773 : Assert(aggref->agglevelsup == 0);
260 1773 : if (list_length(aggref->args) != 1)
261 520 : return true; /* it couldn't be MIN/MAX */
262 :
263 : /*
264 : * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
265 : * outcome if the aggsortop's operator class recognizes non-identical
266 : * values as equal. For example, 4.0 and 4.00 are equal according to
267 : * numeric_ops, yet distinguishable. If MIN() receives more than one
268 : * value equal to 4.0 and no value less than 4.0, it is unspecified
269 : * which of those equal values MIN() returns. An ORDER BY expression
270 : * that differs for each of those equal values of the argument
271 : * expression makes the result predictable once again. This is a
272 : * niche requirement, and we do not implement it with subquery paths.
273 : * In any case, this test lets us reject ordered-set aggregates
274 : * quickly.
275 : */
276 1253 : if (aggref->aggorder != NIL)
277 12 : return true;
278 : /* note: we do not care if DISTINCT is mentioned ... */
279 :
280 : /*
281 : * We might implement the optimization when a FILTER clause is present
282 : * by adding the filter to the quals of the generated subquery. For
283 : * now, just punt.
284 : */
285 1241 : if (aggref->aggfilter != NULL)
286 3 : return true;
287 :
288 1238 : aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
289 1238 : if (!OidIsValid(aggsortop))
290 1057 : return true; /* not a MIN/MAX aggregate */
291 :
292 181 : curTarget = (TargetEntry *) linitial(aggref->args);
293 :
294 181 : if (contain_mutable_functions((Node *) curTarget->expr))
295 0 : return true; /* not potentially indexable */
296 :
297 181 : if (type_is_rowtype(exprType((Node *) curTarget->expr)))
298 0 : return true; /* IS NOT NULL would have weird semantics */
299 :
300 : /*
301 : * Check whether it's already in the list, and add it if not.
302 : */
303 324 : foreach(l, *context)
304 : {
305 149 : mminfo = (MinMaxAggInfo *) lfirst(l);
306 232 : if (mminfo->aggfnoid == aggref->aggfnoid &&
307 83 : equal(mminfo->target, curTarget->expr))
308 6 : return false;
309 : }
310 :
311 175 : mminfo = makeNode(MinMaxAggInfo);
312 175 : mminfo->aggfnoid = aggref->aggfnoid;
313 175 : mminfo->aggsortop = aggsortop;
314 175 : mminfo->target = curTarget->expr;
315 175 : mminfo->subroot = NULL; /* don't compute path yet */
316 175 : mminfo->path = NULL;
317 175 : mminfo->pathcost = 0;
318 175 : mminfo->param = NULL;
319 :
320 175 : *context = lappend(*context, mminfo);
321 :
322 : /*
323 : * We need not recurse into the argument, since it can't contain any
324 : * aggregates.
325 : */
326 175 : return false;
327 : }
328 3517 : Assert(!IsA(node, SubLink));
329 3517 : return expression_tree_walker(node, find_minmax_aggs_walker,
330 : (void *) context);
331 : }
332 :
333 : /*
334 : * build_minmax_path
335 : * Given a MIN/MAX aggregate, try to build an indexscan Path it can be
336 : * optimized with.
337 : *
338 : * If successful, stash the best path in *mminfo and return TRUE.
339 : * Otherwise, return FALSE.
340 : */
341 : static bool
342 154 : build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
343 : Oid eqop, Oid sortop, bool nulls_first)
344 : {
345 : PlannerInfo *subroot;
346 : Query *parse;
347 : TargetEntry *tle;
348 : List *tlist;
349 : NullTest *ntest;
350 : SortGroupClause *sortcl;
351 : RelOptInfo *final_rel;
352 : Path *sorted_path;
353 : Cost path_cost;
354 : double path_fraction;
355 :
356 : /*
357 : * We are going to construct what is effectively a sub-SELECT query, so
358 : * clone the current query level's state and adjust it to make it look
359 : * like a subquery. Any outer references will now be one level higher
360 : * than before. (This means that when we are done, there will be no Vars
361 : * of level 1, which is why the subquery can become an initplan.)
362 : */
363 154 : subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
364 154 : memcpy(subroot, root, sizeof(PlannerInfo));
365 154 : subroot->query_level++;
366 154 : subroot->parent_root = root;
367 : /* reset subplan-related stuff */
368 154 : subroot->plan_params = NIL;
369 154 : subroot->outer_params = NULL;
370 154 : subroot->init_plans = NIL;
371 :
372 154 : subroot->parse = parse = copyObject(root->parse);
373 154 : IncrementVarSublevelsUp((Node *) parse, 1, 1);
374 :
375 : /* append_rel_list might contain outer Vars? */
376 154 : subroot->append_rel_list = copyObject(root->append_rel_list);
377 154 : IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
378 : /* There shouldn't be any OJ info to translate, as yet */
379 154 : Assert(subroot->join_info_list == NIL);
380 : /* and we haven't made equivalence classes, either */
381 154 : Assert(subroot->eq_classes == NIL);
382 : /* and we haven't created PlaceHolderInfos, either */
383 154 : Assert(subroot->placeholder_list == NIL);
384 :
385 : /*----------
386 : * Generate modified query of the form
387 : * (SELECT col FROM tab
388 : * WHERE col IS NOT NULL AND existing-quals
389 : * ORDER BY col ASC/DESC
390 : * LIMIT 1)
391 : *----------
392 : */
393 : /* single tlist entry that is the aggregate target */
394 154 : tle = makeTargetEntry(copyObject(mminfo->target),
395 : (AttrNumber) 1,
396 : pstrdup("agg_target"),
397 : false);
398 154 : tlist = list_make1(tle);
399 154 : subroot->processed_tlist = parse->targetList = tlist;
400 :
401 : /* No HAVING, no DISTINCT, no aggregates anymore */
402 154 : parse->havingQual = NULL;
403 154 : subroot->hasHavingQual = false;
404 154 : parse->distinctClause = NIL;
405 154 : parse->hasDistinctOn = false;
406 154 : parse->hasAggs = false;
407 :
408 : /* Build "target IS NOT NULL" expression */
409 154 : ntest = makeNode(NullTest);
410 154 : ntest->nulltesttype = IS_NOT_NULL;
411 154 : ntest->arg = copyObject(mminfo->target);
412 : /* we checked it wasn't a rowtype in find_minmax_aggs_walker */
413 154 : ntest->argisrow = false;
414 154 : ntest->location = -1;
415 :
416 : /* User might have had that in WHERE already */
417 154 : if (!list_member((List *) parse->jointree->quals, ntest))
418 308 : parse->jointree->quals = (Node *)
419 154 : lcons(ntest, (List *) parse->jointree->quals);
420 :
421 : /* Build suitable ORDER BY clause */
422 154 : sortcl = makeNode(SortGroupClause);
423 154 : sortcl->tleSortGroupRef = assignSortGroupRef(tle, tlist);
424 154 : sortcl->eqop = eqop;
425 154 : sortcl->sortop = sortop;
426 154 : sortcl->nulls_first = nulls_first;
427 154 : sortcl->hashable = false; /* no need to make this accurate */
428 154 : parse->sortClause = list_make1(sortcl);
429 :
430 : /* set up expressions for LIMIT 1 */
431 154 : parse->limitOffset = NULL;
432 154 : parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
433 : sizeof(int64),
434 : Int64GetDatum(1), false,
435 : FLOAT8PASSBYVAL);
436 :
437 : /*
438 : * Generate the best paths for this query, telling query_planner that we
439 : * have LIMIT 1.
440 : */
441 154 : subroot->tuple_fraction = 1.0;
442 154 : subroot->limit_tuples = 1.0;
443 :
444 154 : final_rel = query_planner(subroot, tlist, minmax_qp_callback, NULL);
445 :
446 : /*
447 : * Since we didn't go through subquery_planner() to handle the subquery,
448 : * we have to do some of the same cleanup it would do, in particular cope
449 : * with params and initplans used within this subquery. (This won't
450 : * matter if we end up not using the subplan.)
451 : */
452 154 : SS_identify_outer_params(subroot);
453 154 : SS_charge_for_initplans(subroot, final_rel);
454 :
455 : /*
456 : * Get the best presorted path, that being the one that's cheapest for
457 : * fetching just one row. If there's no such path, fail.
458 : */
459 154 : if (final_rel->rows > 1.0)
460 150 : path_fraction = 1.0 / final_rel->rows;
461 : else
462 4 : path_fraction = 1.0;
463 :
464 154 : sorted_path =
465 154 : get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
466 : subroot->query_pathkeys,
467 : NULL,
468 : path_fraction);
469 154 : if (!sorted_path)
470 62 : return false;
471 :
472 : /*
473 : * The path might not return exactly what we want, so fix that. (We
474 : * assume that this won't change any conclusions about which was the
475 : * cheapest path.)
476 : */
477 92 : sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
478 : create_pathtarget(subroot, tlist));
479 :
480 : /*
481 : * Determine cost to get just the first row of the presorted path.
482 : *
483 : * Note: cost calculation here should match
484 : * compare_fractional_path_costs().
485 : */
486 276 : path_cost = sorted_path->startup_cost +
487 184 : path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
488 :
489 : /* Save state for further processing */
490 92 : mminfo->subroot = subroot;
491 92 : mminfo->path = sorted_path;
492 92 : mminfo->pathcost = path_cost;
493 :
494 92 : return true;
495 : }
496 :
497 : /*
498 : * Compute query_pathkeys and other pathkeys during query_planner()
499 : */
500 : static void
501 154 : minmax_qp_callback(PlannerInfo *root, void *extra)
502 : {
503 154 : root->group_pathkeys = NIL;
504 154 : root->window_pathkeys = NIL;
505 154 : root->distinct_pathkeys = NIL;
506 :
507 154 : root->sort_pathkeys =
508 308 : make_pathkeys_for_sortclauses(root,
509 154 : root->parse->sortClause,
510 154 : root->parse->targetList);
511 :
512 154 : root->query_pathkeys = root->sort_pathkeys;
513 154 : }
514 :
515 : /*
516 : * Get the OID of the sort operator, if any, associated with an aggregate.
517 : * Returns InvalidOid if there is no such operator.
518 : */
519 : static Oid
520 1238 : fetch_agg_sort_op(Oid aggfnoid)
521 : {
522 : HeapTuple aggTuple;
523 : Form_pg_aggregate aggform;
524 : Oid aggsortop;
525 :
526 : /* fetch aggregate entry from pg_aggregate */
527 1238 : aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
528 1238 : if (!HeapTupleIsValid(aggTuple))
529 0 : return InvalidOid;
530 1238 : aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
531 1238 : aggsortop = aggform->aggsortop;
532 1238 : ReleaseSysCache(aggTuple);
533 :
534 1238 : return aggsortop;
535 : }
|