Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * planner.c
4 : * The query optimizer external interface.
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/plan/planner.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include <limits.h>
19 : #include <math.h>
20 :
21 : #include "access/htup_details.h"
22 : #include "access/parallel.h"
23 : #include "access/sysattr.h"
24 : #include "access/xact.h"
25 : #include "catalog/pg_constraint_fn.h"
26 : #include "catalog/pg_proc.h"
27 : #include "catalog/pg_type.h"
28 : #include "executor/executor.h"
29 : #include "executor/nodeAgg.h"
30 : #include "foreign/fdwapi.h"
31 : #include "miscadmin.h"
32 : #include "lib/bipartite_match.h"
33 : #include "lib/knapsack.h"
34 : #include "nodes/makefuncs.h"
35 : #include "nodes/nodeFuncs.h"
36 : #ifdef OPTIMIZER_DEBUG
37 : #include "nodes/print.h"
38 : #endif
39 : #include "optimizer/clauses.h"
40 : #include "optimizer/cost.h"
41 : #include "optimizer/pathnode.h"
42 : #include "optimizer/paths.h"
43 : #include "optimizer/plancat.h"
44 : #include "optimizer/planmain.h"
45 : #include "optimizer/planner.h"
46 : #include "optimizer/prep.h"
47 : #include "optimizer/subselect.h"
48 : #include "optimizer/tlist.h"
49 : #include "optimizer/var.h"
50 : #include "parser/analyze.h"
51 : #include "parser/parsetree.h"
52 : #include "parser/parse_agg.h"
53 : #include "rewrite/rewriteManip.h"
54 : #include "storage/dsm_impl.h"
55 : #include "utils/rel.h"
56 : #include "utils/selfuncs.h"
57 : #include "utils/lsyscache.h"
58 : #include "utils/syscache.h"
59 :
60 :
61 : /* GUC parameters */
62 : double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
63 : int force_parallel_mode = FORCE_PARALLEL_OFF;
64 :
65 : /* Hook for plugins to get control in planner() */
66 : planner_hook_type planner_hook = NULL;
67 :
68 : /* Hook for plugins to get control when grouping_planner() plans upper rels */
69 : create_upper_paths_hook_type create_upper_paths_hook = NULL;
70 :
71 :
72 : /* Expression kind codes for preprocess_expression */
73 : #define EXPRKIND_QUAL 0
74 : #define EXPRKIND_TARGET 1
75 : #define EXPRKIND_RTFUNC 2
76 : #define EXPRKIND_RTFUNC_LATERAL 3
77 : #define EXPRKIND_VALUES 4
78 : #define EXPRKIND_VALUES_LATERAL 5
79 : #define EXPRKIND_LIMIT 6
80 : #define EXPRKIND_APPINFO 7
81 : #define EXPRKIND_PHV 8
82 : #define EXPRKIND_TABLESAMPLE 9
83 : #define EXPRKIND_ARBITER_ELEM 10
84 : #define EXPRKIND_TABLEFUNC 11
85 : #define EXPRKIND_TABLEFUNC_LATERAL 12
86 :
87 : /* Passthrough data for standard_qp_callback */
88 : typedef struct
89 : {
90 : List *tlist; /* preprocessed query targetlist */
91 : List *activeWindows; /* active windows, if any */
92 : List *groupClause; /* overrides parse->groupClause */
93 : } standard_qp_extra;
94 :
95 : /*
96 : * Data specific to grouping sets
97 : */
98 :
99 : typedef struct
100 : {
101 : List *rollups;
102 : List *hash_sets_idx;
103 : double dNumHashGroups;
104 : bool any_hashable;
105 : Bitmapset *unsortable_refs;
106 : Bitmapset *unhashable_refs;
107 : List *unsortable_sets;
108 : int *tleref_to_colnum_map;
109 : } grouping_sets_data;
110 :
111 : /* Local functions */
112 : static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
113 : static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
114 : static void inheritance_planner(PlannerInfo *root);
115 : static void grouping_planner(PlannerInfo *root, bool inheritance_update,
116 : double tuple_fraction);
117 : static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
118 : static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
119 : int *tleref_to_colnum_map);
120 : static void preprocess_rowmarks(PlannerInfo *root);
121 : static double preprocess_limit(PlannerInfo *root,
122 : double tuple_fraction,
123 : int64 *offset_est, int64 *count_est);
124 : static bool limit_needed(Query *parse);
125 : static void remove_useless_groupby_columns(PlannerInfo *root);
126 : static List *preprocess_groupclause(PlannerInfo *root, List *force);
127 : static List *extract_rollup_sets(List *groupingSets);
128 : static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
129 : static void standard_qp_callback(PlannerInfo *root, void *extra);
130 : static double get_number_of_groups(PlannerInfo *root,
131 : double path_rows,
132 : grouping_sets_data *gd);
133 : static Size estimate_hashagg_tablesize(Path *path,
134 : const AggClauseCosts *agg_costs,
135 : double dNumGroups);
136 : static RelOptInfo *create_grouping_paths(PlannerInfo *root,
137 : RelOptInfo *input_rel,
138 : PathTarget *target,
139 : const AggClauseCosts *agg_costs,
140 : grouping_sets_data *gd);
141 : static void consider_groupingsets_paths(PlannerInfo *root,
142 : RelOptInfo *grouped_rel,
143 : Path *path,
144 : bool is_sorted,
145 : bool can_hash,
146 : PathTarget *target,
147 : grouping_sets_data *gd,
148 : const AggClauseCosts *agg_costs,
149 : double dNumGroups);
150 : static RelOptInfo *create_window_paths(PlannerInfo *root,
151 : RelOptInfo *input_rel,
152 : PathTarget *input_target,
153 : PathTarget *output_target,
154 : List *tlist,
155 : WindowFuncLists *wflists,
156 : List *activeWindows);
157 : static void create_one_window_path(PlannerInfo *root,
158 : RelOptInfo *window_rel,
159 : Path *path,
160 : PathTarget *input_target,
161 : PathTarget *output_target,
162 : List *tlist,
163 : WindowFuncLists *wflists,
164 : List *activeWindows);
165 : static RelOptInfo *create_distinct_paths(PlannerInfo *root,
166 : RelOptInfo *input_rel);
167 : static RelOptInfo *create_ordered_paths(PlannerInfo *root,
168 : RelOptInfo *input_rel,
169 : PathTarget *target,
170 : double limit_tuples);
171 : static PathTarget *make_group_input_target(PlannerInfo *root,
172 : PathTarget *final_target);
173 : static PathTarget *make_partial_grouping_target(PlannerInfo *root,
174 : PathTarget *grouping_target);
175 : static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
176 : static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
177 : static PathTarget *make_window_input_target(PlannerInfo *root,
178 : PathTarget *final_target,
179 : List *activeWindows);
180 : static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
181 : List *tlist);
182 : static PathTarget *make_sort_input_target(PlannerInfo *root,
183 : PathTarget *final_target,
184 : bool *have_postponed_srfs);
185 : static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
186 : List *targets, List *targets_contain_srfs);
187 :
188 :
189 : /*****************************************************************************
190 : *
191 : * Query optimizer entry point
192 : *
193 : * To support loadable plugins that monitor or modify planner behavior,
194 : * we provide a hook variable that lets a plugin get control before and
195 : * after the standard planning process. The plugin would normally call
196 : * standard_planner().
197 : *
198 : * Note to plugin authors: standard_planner() scribbles on its Query input,
199 : * so you'd better copy that data structure if you want to plan more than once.
200 : *
201 : *****************************************************************************/
202 : PlannedStmt *
203 22517 : planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
204 : {
205 : PlannedStmt *result;
206 :
207 22517 : if (planner_hook)
208 0 : result = (*planner_hook) (parse, cursorOptions, boundParams);
209 : else
210 22517 : result = standard_planner(parse, cursorOptions, boundParams);
211 22238 : return result;
212 : }
213 :
214 : PlannedStmt *
215 22517 : standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
216 : {
217 : PlannedStmt *result;
218 : PlannerGlobal *glob;
219 : double tuple_fraction;
220 : PlannerInfo *root;
221 : RelOptInfo *final_rel;
222 : Path *best_path;
223 : Plan *top_plan;
224 : ListCell *lp,
225 : *lr;
226 :
227 : /*
228 : * Set up global state for this planner invocation. This data is needed
229 : * across all levels of sub-Query that might exist in the given command,
230 : * so we keep it in a separate struct that's linked to by each per-Query
231 : * PlannerInfo.
232 : */
233 22517 : glob = makeNode(PlannerGlobal);
234 :
235 22517 : glob->boundParams = boundParams;
236 22517 : glob->subplans = NIL;
237 22517 : glob->subroots = NIL;
238 22517 : glob->rewindPlanIDs = NULL;
239 22517 : glob->finalrtable = NIL;
240 22517 : glob->finalrowmarks = NIL;
241 22517 : glob->resultRelations = NIL;
242 22517 : glob->nonleafResultRelations = NIL;
243 22517 : glob->rootResultRelations = NIL;
244 22517 : glob->relationOids = NIL;
245 22517 : glob->invalItems = NIL;
246 22517 : glob->nParamExec = 0;
247 22517 : glob->lastPHId = 0;
248 22517 : glob->lastRowMarkId = 0;
249 22517 : glob->lastPlanNodeId = 0;
250 22517 : glob->transientPlan = false;
251 22517 : glob->dependsOnRole = false;
252 :
253 : /*
254 : * Assess whether it's feasible to use parallel mode for this query. We
255 : * can't do this in a standalone backend, or if the command will try to
256 : * modify any data, or if this is a cursor operation, or if GUCs are set
257 : * to values that don't permit parallelism, or if parallel-unsafe
258 : * functions are present in the query tree.
259 : *
260 : * For now, we don't try to use parallel mode if we're running inside a
261 : * parallel worker. We might eventually be able to relax this
262 : * restriction, but for now it seems best not to have parallel workers
263 : * trying to create their own parallel workers.
264 : *
265 : * We can't use parallelism in serializable mode because the predicate
266 : * locking code is not parallel-aware. It's not catastrophic if someone
267 : * tries to run a parallel plan in serializable mode; it just won't get
268 : * any workers and will run serially. But it seems like a good heuristic
269 : * to assume that the same serialization level will be in effect at plan
270 : * time and execution time, so don't generate a parallel plan if we're in
271 : * serializable mode.
272 : */
273 22517 : if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
274 21184 : IsUnderPostmaster &&
275 42368 : dynamic_shared_memory_type != DSM_IMPL_NONE &&
276 38190 : parse->commandType == CMD_SELECT &&
277 33993 : !parse->hasModifyingCTE &&
278 33926 : max_parallel_workers_per_gather > 0 &&
279 33877 : !IsParallelWorker() &&
280 16938 : !IsolationIsSerializable())
281 : {
282 : /* all the cheap tests pass, so scan the query tree */
283 16932 : glob->maxParallelHazard = max_parallel_hazard(parse);
284 16932 : glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
285 : }
286 : else
287 : {
288 : /* skip the query tree scan, just assume it's unsafe */
289 5585 : glob->maxParallelHazard = PROPARALLEL_UNSAFE;
290 5585 : glob->parallelModeOK = false;
291 : }
292 :
293 : /*
294 : * glob->parallelModeNeeded is normally set to false here and changed to
295 : * true during plan creation if a Gather or Gather Merge plan is actually
296 : * created (cf. create_gather_plan, create_gather_merge_plan).
297 : *
298 : * However, if force_parallel_mode = on or force_parallel_mode = regress,
299 : * then we impose parallel mode whenever it's safe to do so, even if the
300 : * final plan doesn't use parallelism. It's not safe to do so if the
301 : * query contains anything parallel-unsafe; parallelModeOK will be false
302 : * in that case. Note that parallelModeOK can't change after this point.
303 : * Otherwise, everything in the query is either parallel-safe or
304 : * parallel-restricted, and in either case it should be OK to impose
305 : * parallel-mode restrictions. If that ends up breaking something, then
306 : * either some function the user included in the query is incorrectly
307 : * labelled as parallel-safe or parallel-restricted when in reality it's
308 : * parallel-unsafe, or else the query planner itself has a bug.
309 : */
310 36005 : glob->parallelModeNeeded = glob->parallelModeOK &&
311 13488 : (force_parallel_mode != FORCE_PARALLEL_OFF);
312 :
313 : /* Determine what fraction of the plan is likely to be scanned */
314 22517 : if (cursorOptions & CURSOR_OPT_FAST_PLAN)
315 : {
316 : /*
317 : * We have no real idea how many tuples the user will ultimately FETCH
318 : * from a cursor, but it is often the case that he doesn't want 'em
319 : * all, or would prefer a fast-start plan anyway so that he can
320 : * process some of the tuples sooner. Use a GUC parameter to decide
321 : * what fraction to optimize for.
322 : */
323 120 : tuple_fraction = cursor_tuple_fraction;
324 :
325 : /*
326 : * We document cursor_tuple_fraction as simply being a fraction, which
327 : * means the edge cases 0 and 1 have to be treated specially here. We
328 : * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
329 : */
330 120 : if (tuple_fraction >= 1.0)
331 0 : tuple_fraction = 0.0;
332 120 : else if (tuple_fraction <= 0.0)
333 0 : tuple_fraction = 1e-10;
334 : }
335 : else
336 : {
337 : /* Default assumption is we need all the tuples */
338 22397 : tuple_fraction = 0.0;
339 : }
340 :
341 : /* primary planning entry point (may recurse for subqueries) */
342 22517 : root = subquery_planner(glob, parse, NULL,
343 : false, tuple_fraction);
344 :
345 : /* Select best Path and turn it into a Plan */
346 22264 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
347 22264 : best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
348 :
349 22264 : top_plan = create_plan(root, best_path);
350 :
351 : /*
352 : * If creating a plan for a scrollable cursor, make sure it can run
353 : * backwards on demand. Add a Material node at the top at need.
354 : */
355 22238 : if (cursorOptions & CURSOR_OPT_SCROLL)
356 : {
357 34 : if (!ExecSupportsBackwardScan(top_plan))
358 4 : top_plan = materialize_finished_plan(top_plan);
359 : }
360 :
361 : /*
362 : * Optionally add a Gather node for testing purposes, provided this is
363 : * actually a safe thing to do.
364 : */
365 22238 : if (force_parallel_mode != FORCE_PARALLEL_OFF && top_plan->parallel_safe)
366 : {
367 3 : Gather *gather = makeNode(Gather);
368 :
369 3 : gather->plan.targetlist = top_plan->targetlist;
370 3 : gather->plan.qual = NIL;
371 3 : gather->plan.lefttree = top_plan;
372 3 : gather->plan.righttree = NULL;
373 3 : gather->num_workers = 1;
374 3 : gather->single_copy = true;
375 3 : gather->invisible = (force_parallel_mode == FORCE_PARALLEL_REGRESS);
376 :
377 : /*
378 : * Since this Gather has no parallel-aware descendants to signal to,
379 : * we don't need a rescan Param.
380 : */
381 3 : gather->rescan_param = -1;
382 :
383 : /*
384 : * Ideally we'd use cost_gather here, but setting up dummy path data
385 : * to satisfy it doesn't seem much cleaner than knowing what it does.
386 : */
387 3 : gather->plan.startup_cost = top_plan->startup_cost +
388 : parallel_setup_cost;
389 6 : gather->plan.total_cost = top_plan->total_cost +
390 3 : parallel_setup_cost + parallel_tuple_cost * top_plan->plan_rows;
391 3 : gather->plan.plan_rows = top_plan->plan_rows;
392 3 : gather->plan.plan_width = top_plan->plan_width;
393 3 : gather->plan.parallel_aware = false;
394 3 : gather->plan.parallel_safe = false;
395 :
396 : /* use parallel mode for parallel plans. */
397 3 : root->glob->parallelModeNeeded = true;
398 :
399 3 : top_plan = &gather->plan;
400 : }
401 :
402 : /*
403 : * If any Params were generated, run through the plan tree and compute
404 : * each plan node's extParam/allParam sets. Ideally we'd merge this into
405 : * set_plan_references' tree traversal, but for now it has to be separate
406 : * because we need to visit subplans before not after main plan.
407 : */
408 22238 : if (glob->nParamExec > 0)
409 : {
410 6468 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
411 8395 : forboth(lp, glob->subplans, lr, glob->subroots)
412 : {
413 1927 : Plan *subplan = (Plan *) lfirst(lp);
414 1927 : PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
415 :
416 1927 : SS_finalize_plan(subroot, subplan);
417 : }
418 6468 : SS_finalize_plan(root, top_plan);
419 : }
420 :
421 : /* final cleanup of the plan */
422 22238 : Assert(glob->finalrtable == NIL);
423 22238 : Assert(glob->finalrowmarks == NIL);
424 22238 : Assert(glob->resultRelations == NIL);
425 22238 : Assert(glob->nonleafResultRelations == NIL);
426 22238 : Assert(glob->rootResultRelations == NIL);
427 22238 : top_plan = set_plan_references(root, top_plan);
428 : /* ... and the subplans (both regular subplans and initplans) */
429 22238 : Assert(list_length(glob->subplans) == list_length(glob->subroots));
430 24165 : forboth(lp, glob->subplans, lr, glob->subroots)
431 : {
432 1927 : Plan *subplan = (Plan *) lfirst(lp);
433 1927 : PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
434 :
435 1927 : lfirst(lp) = set_plan_references(subroot, subplan);
436 : }
437 :
438 : /* build the PlannedStmt result */
439 22238 : result = makeNode(PlannedStmt);
440 :
441 22238 : result->commandType = parse->commandType;
442 22238 : result->queryId = parse->queryId;
443 22238 : result->hasReturning = (parse->returningList != NIL);
444 22238 : result->hasModifyingCTE = parse->hasModifyingCTE;
445 22238 : result->canSetTag = parse->canSetTag;
446 22238 : result->transientPlan = glob->transientPlan;
447 22238 : result->dependsOnRole = glob->dependsOnRole;
448 22238 : result->parallelModeNeeded = glob->parallelModeNeeded;
449 22238 : result->planTree = top_plan;
450 22238 : result->rtable = glob->finalrtable;
451 22238 : result->resultRelations = glob->resultRelations;
452 22238 : result->nonleafResultRelations = glob->nonleafResultRelations;
453 22238 : result->rootResultRelations = glob->rootResultRelations;
454 22238 : result->subplans = glob->subplans;
455 22238 : result->rewindPlanIDs = glob->rewindPlanIDs;
456 22238 : result->rowMarks = glob->finalrowmarks;
457 22238 : result->relationOids = glob->relationOids;
458 22238 : result->invalItems = glob->invalItems;
459 22238 : result->nParamExec = glob->nParamExec;
460 : /* utilityStmt should be null, but we might as well copy it */
461 22238 : result->utilityStmt = parse->utilityStmt;
462 22238 : result->stmt_location = parse->stmt_location;
463 22238 : result->stmt_len = parse->stmt_len;
464 :
465 22238 : return result;
466 : }
467 :
468 :
469 : /*--------------------
470 : * subquery_planner
471 : * Invokes the planner on a subquery. We recurse to here for each
472 : * sub-SELECT found in the query tree.
473 : *
474 : * glob is the global state for the current planner run.
475 : * parse is the querytree produced by the parser & rewriter.
476 : * parent_root is the immediate parent Query's info (NULL at the top level).
477 : * hasRecursion is true if this is a recursive WITH query.
478 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
479 : * tuple_fraction is interpreted as explained for grouping_planner, below.
480 : *
481 : * Basically, this routine does the stuff that should only be done once
482 : * per Query object. It then calls grouping_planner. At one time,
483 : * grouping_planner could be invoked recursively on the same Query object;
484 : * that's not currently true, but we keep the separation between the two
485 : * routines anyway, in case we need it again someday.
486 : *
487 : * subquery_planner will be called recursively to handle sub-Query nodes
488 : * found within the query's expressions and rangetable.
489 : *
490 : * Returns the PlannerInfo struct ("root") that contains all data generated
491 : * while planning the subquery. In particular, the Path(s) attached to
492 : * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
493 : * cheapest way(s) to implement the query. The top level will select the
494 : * best Path and pass it through createplan.c to produce a finished Plan.
495 : *--------------------
496 : */
497 : PlannerInfo *
498 25427 : subquery_planner(PlannerGlobal *glob, Query *parse,
499 : PlannerInfo *parent_root,
500 : bool hasRecursion, double tuple_fraction)
501 : {
502 : PlannerInfo *root;
503 : List *newWithCheckOptions;
504 : List *newHaving;
505 : bool hasOuterJoins;
506 : RelOptInfo *final_rel;
507 : ListCell *l;
508 :
509 : /* Create a PlannerInfo data structure for this subquery */
510 25427 : root = makeNode(PlannerInfo);
511 25427 : root->parse = parse;
512 25427 : root->glob = glob;
513 25427 : root->query_level = parent_root ? parent_root->query_level + 1 : 1;
514 25427 : root->parent_root = parent_root;
515 25427 : root->plan_params = NIL;
516 25427 : root->outer_params = NULL;
517 25427 : root->planner_cxt = CurrentMemoryContext;
518 25427 : root->init_plans = NIL;
519 25427 : root->cte_plan_ids = NIL;
520 25427 : root->multiexpr_params = NIL;
521 25427 : root->eq_classes = NIL;
522 25427 : root->append_rel_list = NIL;
523 25427 : root->pcinfo_list = NIL;
524 25427 : root->rowMarks = NIL;
525 25427 : memset(root->upper_rels, 0, sizeof(root->upper_rels));
526 25427 : memset(root->upper_targets, 0, sizeof(root->upper_targets));
527 25427 : root->processed_tlist = NIL;
528 25427 : root->grouping_map = NULL;
529 25427 : root->minmax_aggs = NIL;
530 25427 : root->qual_security_level = 0;
531 25427 : root->hasInheritedTarget = false;
532 25427 : root->hasRecursion = hasRecursion;
533 25427 : if (hasRecursion)
534 40 : root->wt_param_id = SS_assign_special_param(root);
535 : else
536 25387 : root->wt_param_id = -1;
537 25427 : root->non_recursive_path = NULL;
538 :
539 : /*
540 : * If there is a WITH list, process each WITH query and build an initplan
541 : * SubPlan structure for it.
542 : */
543 25427 : if (parse->cteList)
544 116 : SS_process_ctes(root);
545 :
546 : /*
547 : * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
548 : * to transform them into joins. Note that this step does not descend
549 : * into subqueries; if we pull up any subqueries below, their SubLinks are
550 : * processed just before pulling them up.
551 : */
552 25427 : if (parse->hasSubLinks)
553 1425 : pull_up_sublinks(root);
554 :
555 : /*
556 : * Scan the rangetable for set-returning functions, and inline them if
557 : * possible (producing subqueries that might get pulled up next).
558 : * Recursion issues here are handled in the same way as for SubLinks.
559 : */
560 25427 : inline_set_returning_functions(root);
561 :
562 : /*
563 : * Check to see if any subqueries in the jointree can be merged into this
564 : * query.
565 : */
566 25427 : pull_up_subqueries(root);
567 :
568 : /*
569 : * If this is a simple UNION ALL query, flatten it into an appendrel. We
570 : * do this now because it requires applying pull_up_subqueries to the leaf
571 : * queries of the UNION ALL, which weren't touched above because they
572 : * weren't referenced by the jointree (they will be after we do this).
573 : */
574 25426 : if (parse->setOperations)
575 331 : flatten_simple_union_all(root);
576 :
577 : /*
578 : * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
579 : * avoid the expense of doing flatten_join_alias_vars(). Also check for
580 : * outer joins --- if none, we can skip reduce_outer_joins(). And check
581 : * for LATERAL RTEs, too. This must be done after we have done
582 : * pull_up_subqueries(), of course.
583 : */
584 25426 : root->hasJoinRTEs = false;
585 25426 : root->hasLateralRTEs = false;
586 25426 : hasOuterJoins = false;
587 54321 : foreach(l, parse->rtable)
588 : {
589 28895 : RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
590 :
591 28895 : if (rte->rtekind == RTE_JOIN)
592 : {
593 2377 : root->hasJoinRTEs = true;
594 2377 : if (IS_OUTER_JOIN(rte->jointype))
595 1323 : hasOuterJoins = true;
596 : }
597 28895 : if (rte->lateral)
598 188 : root->hasLateralRTEs = true;
599 : }
600 :
601 : /*
602 : * Preprocess RowMark information. We need to do this after subquery
603 : * pullup (so that all non-inherited RTEs are present) and before
604 : * inheritance expansion (so that the info is available for
605 : * expand_inherited_tables to examine and modify).
606 : */
607 25426 : preprocess_rowmarks(root);
608 :
609 : /*
610 : * Expand any rangetable entries that are inheritance sets into "append
611 : * relations". This can add entries to the rangetable, but they must be
612 : * plain base relations not joins, so it's OK (and marginally more
613 : * efficient) to do it after checking for join RTEs. We must do it after
614 : * pulling up subqueries, else we'd fail to handle inherited tables in
615 : * subqueries.
616 : */
617 25426 : expand_inherited_tables(root);
618 :
619 : /*
620 : * Set hasHavingQual to remember if HAVING clause is present. Needed
621 : * because preprocess_expression will reduce a constant-true condition to
622 : * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
623 : */
624 25426 : root->hasHavingQual = (parse->havingQual != NULL);
625 :
626 : /* Clear this flag; might get set in distribute_qual_to_rels */
627 25426 : root->hasPseudoConstantQuals = false;
628 :
629 : /*
630 : * Do expression preprocessing on targetlist and quals, as well as other
631 : * random expressions in the querytree. Note that we do not need to
632 : * handle sort/group expressions explicitly, because they are actually
633 : * part of the targetlist.
634 : */
635 25178 : parse->targetList = (List *)
636 25426 : preprocess_expression(root, (Node *) parse->targetList,
637 : EXPRKIND_TARGET);
638 :
639 : /* Constant-folding might have removed all set-returning functions */
640 25178 : if (parse->hasTargetSRFs)
641 216 : parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
642 :
643 25178 : newWithCheckOptions = NIL;
644 25388 : foreach(l, parse->withCheckOptions)
645 : {
646 210 : WithCheckOption *wco = (WithCheckOption *) lfirst(l);
647 :
648 210 : wco->qual = preprocess_expression(root, wco->qual,
649 : EXPRKIND_QUAL);
650 210 : if (wco->qual != NULL)
651 177 : newWithCheckOptions = lappend(newWithCheckOptions, wco);
652 : }
653 25178 : parse->withCheckOptions = newWithCheckOptions;
654 :
655 25178 : parse->returningList = (List *)
656 25178 : preprocess_expression(root, (Node *) parse->returningList,
657 : EXPRKIND_TARGET);
658 :
659 25178 : preprocess_qual_conditions(root, (Node *) parse->jointree);
660 :
661 25178 : parse->havingQual = preprocess_expression(root, parse->havingQual,
662 : EXPRKIND_QUAL);
663 :
664 25320 : foreach(l, parse->windowClause)
665 : {
666 142 : WindowClause *wc = (WindowClause *) lfirst(l);
667 :
668 : /* partitionClause/orderClause are sort/group expressions */
669 142 : wc->startOffset = preprocess_expression(root, wc->startOffset,
670 : EXPRKIND_LIMIT);
671 142 : wc->endOffset = preprocess_expression(root, wc->endOffset,
672 : EXPRKIND_LIMIT);
673 : }
674 :
675 25178 : parse->limitOffset = preprocess_expression(root, parse->limitOffset,
676 : EXPRKIND_LIMIT);
677 25178 : parse->limitCount = preprocess_expression(root, parse->limitCount,
678 : EXPRKIND_LIMIT);
679 :
680 25178 : if (parse->onConflict)
681 : {
682 332 : parse->onConflict->arbiterElems = (List *)
683 166 : preprocess_expression(root,
684 166 : (Node *) parse->onConflict->arbiterElems,
685 : EXPRKIND_ARBITER_ELEM);
686 332 : parse->onConflict->arbiterWhere =
687 166 : preprocess_expression(root,
688 166 : parse->onConflict->arbiterWhere,
689 : EXPRKIND_QUAL);
690 332 : parse->onConflict->onConflictSet = (List *)
691 166 : preprocess_expression(root,
692 166 : (Node *) parse->onConflict->onConflictSet,
693 : EXPRKIND_TARGET);
694 332 : parse->onConflict->onConflictWhere =
695 166 : preprocess_expression(root,
696 166 : parse->onConflict->onConflictWhere,
697 : EXPRKIND_QUAL);
698 : /* exclRelTlist contains only Vars, so no preprocessing needed */
699 : }
700 :
701 25178 : root->append_rel_list = (List *)
702 25178 : preprocess_expression(root, (Node *) root->append_rel_list,
703 : EXPRKIND_APPINFO);
704 :
705 : /* Also need to preprocess expressions within RTEs */
706 55230 : foreach(l, parse->rtable)
707 : {
708 30052 : RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
709 : int kind;
710 : ListCell *lcsq;
711 :
712 30052 : if (rte->rtekind == RTE_RELATION)
713 : {
714 22483 : if (rte->tablesample)
715 40 : rte->tablesample = (TableSampleClause *)
716 40 : preprocess_expression(root,
717 40 : (Node *) rte->tablesample,
718 : EXPRKIND_TABLESAMPLE);
719 : }
720 7569 : else if (rte->rtekind == RTE_SUBQUERY)
721 : {
722 : /*
723 : * We don't want to do all preprocessing yet on the subquery's
724 : * expressions, since that will happen when we plan it. But if it
725 : * contains any join aliases of our level, those have to get
726 : * expanded now, because planning of the subquery won't do it.
727 : * That's only possible if the subquery is LATERAL.
728 : */
729 3075 : if (rte->lateral && root->hasJoinRTEs)
730 56 : rte->subquery = (Query *)
731 56 : flatten_join_alias_vars(root, (Node *) rte->subquery);
732 : }
733 4494 : else if (rte->rtekind == RTE_FUNCTION)
734 : {
735 : /* Preprocess the function expression(s) fully */
736 1327 : kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
737 1327 : rte->functions = (List *)
738 1327 : preprocess_expression(root, (Node *) rte->functions, kind);
739 : }
740 3167 : else if (rte->rtekind == RTE_TABLEFUNC)
741 : {
742 : /* Preprocess the function expression(s) fully */
743 22 : kind = rte->lateral ? EXPRKIND_TABLEFUNC_LATERAL : EXPRKIND_TABLEFUNC;
744 22 : rte->tablefunc = (TableFunc *)
745 22 : preprocess_expression(root, (Node *) rte->tablefunc, kind);
746 : }
747 3145 : else if (rte->rtekind == RTE_VALUES)
748 : {
749 : /* Preprocess the values lists fully */
750 534 : kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
751 534 : rte->values_lists = (List *)
752 534 : preprocess_expression(root, (Node *) rte->values_lists, kind);
753 : }
754 :
755 : /*
756 : * Process each element of the securityQuals list as if it were a
757 : * separate qual expression (as indeed it is). We need to do it this
758 : * way to get proper canonicalization of AND/OR structure. Note that
759 : * this converts each element into an implicit-AND sublist.
760 : */
761 30349 : foreach(lcsq, rte->securityQuals)
762 : {
763 297 : lfirst(lcsq) = preprocess_expression(root,
764 297 : (Node *) lfirst(lcsq),
765 : EXPRKIND_QUAL);
766 : }
767 : }
768 :
769 : /*
770 : * In some cases we may want to transfer a HAVING clause into WHERE. We
771 : * cannot do so if the HAVING clause contains aggregates (obviously) or
772 : * volatile functions (since a HAVING clause is supposed to be executed
773 : * only once per group). We also can't do this if there are any nonempty
774 : * grouping sets; moving such a clause into WHERE would potentially change
775 : * the results, if any referenced column isn't present in all the grouping
776 : * sets. (If there are only empty grouping sets, then the HAVING clause
777 : * must be degenerate as discussed below.)
778 : *
779 : * Also, it may be that the clause is so expensive to execute that we're
780 : * better off doing it only once per group, despite the loss of
781 : * selectivity. This is hard to estimate short of doing the entire
782 : * planning process twice, so we use a heuristic: clauses containing
783 : * subplans are left in HAVING. Otherwise, we move or copy the HAVING
784 : * clause into WHERE, in hopes of eliminating tuples before aggregation
785 : * instead of after.
786 : *
787 : * If the query has explicit grouping then we can simply move such a
788 : * clause into WHERE; any group that fails the clause will not be in the
789 : * output because none of its tuples will reach the grouping or
790 : * aggregation stage. Otherwise we must have a degenerate (variable-free)
791 : * HAVING clause, which we put in WHERE so that query_planner() can use it
792 : * in a gating Result node, but also keep in HAVING to ensure that we
793 : * don't emit a bogus aggregated row. (This could be done better, but it
794 : * seems not worth optimizing.)
795 : *
796 : * Note that both havingQual and parse->jointree->quals are in
797 : * implicitly-ANDed-list form at this point, even though they are declared
798 : * as Node *.
799 : */
800 25178 : newHaving = NIL;
801 25227 : foreach(l, (List *) parse->havingQual)
802 : {
803 49 : Node *havingclause = (Node *) lfirst(l);
804 :
805 92 : if ((parse->groupClause && parse->groupingSets) ||
806 71 : contain_agg_clause(havingclause) ||
807 56 : contain_volatile_functions(havingclause) ||
808 28 : contain_subplans(havingclause))
809 : {
810 : /* keep it in HAVING */
811 21 : newHaving = lappend(newHaving, havingclause);
812 : }
813 28 : else if (parse->groupClause && !parse->groupingSets)
814 : {
815 : /* move it to WHERE */
816 50 : parse->jointree->quals = (Node *)
817 25 : lappend((List *) parse->jointree->quals, havingclause);
818 : }
819 : else
820 : {
821 : /* put a copy in WHERE, keep it in HAVING */
822 6 : parse->jointree->quals = (Node *)
823 3 : lappend((List *) parse->jointree->quals,
824 : copyObject(havingclause));
825 3 : newHaving = lappend(newHaving, havingclause);
826 : }
827 : }
828 25178 : parse->havingQual = (Node *) newHaving;
829 :
830 : /* Remove any redundant GROUP BY columns */
831 25178 : remove_useless_groupby_columns(root);
832 :
833 : /*
834 : * If we have any outer joins, try to reduce them to plain inner joins.
835 : * This step is most easily done after we've done expression
836 : * preprocessing.
837 : */
838 25178 : if (hasOuterJoins)
839 1126 : reduce_outer_joins(root);
840 :
841 : /*
842 : * Do the main planning. If we have an inherited target relation, that
843 : * needs special processing, else go straight to grouping_planner.
844 : */
845 29519 : if (parse->resultRelation &&
846 4341 : rt_fetch(parse->resultRelation, parse->rtable)->inh)
847 67 : inheritance_planner(root);
848 : else
849 25111 : grouping_planner(root, false, tuple_fraction);
850 :
851 : /*
852 : * Capture the set of outer-level param IDs we have access to, for use in
853 : * extParam/allParam calculations later.
854 : */
855 25174 : SS_identify_outer_params(root);
856 :
857 : /*
858 : * If any initPlans were created in this query level, adjust the surviving
859 : * Paths' costs and parallel-safety flags to account for them. The
860 : * initPlans won't actually get attached to the plan tree till
861 : * create_plan() runs, but we must include their effects now.
862 : */
863 25174 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
864 25174 : SS_charge_for_initplans(root, final_rel);
865 :
866 : /*
867 : * Make sure we've identified the cheapest Path for the final rel. (By
868 : * doing this here not in grouping_planner, we include initPlan costs in
869 : * the decision, though it's unlikely that will change anything.)
870 : */
871 25174 : set_cheapest(final_rel);
872 :
873 25174 : return root;
874 : }
875 :
876 : /*
877 : * preprocess_expression
878 : * Do subquery_planner's preprocessing work for an expression,
879 : * which can be a targetlist, a WHERE clause (including JOIN/ON
880 : * conditions), a HAVING clause, or a few other things.
881 : */
882 : static Node *
883 184307 : preprocess_expression(PlannerInfo *root, Node *expr, int kind)
884 : {
885 : /*
886 : * Fall out quickly if expression is empty. This occurs often enough to
887 : * be worth checking. Note that null->null is the correct conversion for
888 : * implicit-AND result format, too.
889 : */
890 184307 : if (expr == NULL)
891 143759 : return NULL;
892 :
893 : /*
894 : * If the query has any join RTEs, replace join alias variables with
895 : * base-relation variables. We must do this before sublink processing,
896 : * else sublinks expanded out from join aliases would not get processed.
897 : * We can skip it in non-lateral RTE functions, VALUES lists, and
898 : * TABLESAMPLE clauses, however, since they can't contain any Vars of the
899 : * current query level.
900 : */
901 40548 : if (root->hasJoinRTEs &&
902 11750 : !(kind == EXPRKIND_RTFUNC ||
903 5849 : kind == EXPRKIND_VALUES ||
904 : kind == EXPRKIND_TABLESAMPLE ||
905 : kind == EXPRKIND_TABLEFUNC))
906 5849 : expr = flatten_join_alias_vars(root, expr);
907 :
908 : /*
909 : * Simplify constant expressions.
910 : *
911 : * Note: an essential effect of this is to convert named-argument function
912 : * calls to positional notation and insert the current actual values of
913 : * any default arguments for functions. To ensure that happens, we *must*
914 : * process all expressions here. Previous PG versions sometimes skipped
915 : * const-simplification if it didn't seem worth the trouble, but we can't
916 : * do that anymore.
917 : *
918 : * Note: this also flattens nested AND and OR expressions into N-argument
919 : * form. All processing of a qual expression after this point must be
920 : * careful to maintain AND/OR flatness --- that is, do not generate a tree
921 : * with AND directly under AND, nor OR directly under OR.
922 : */
923 40548 : expr = eval_const_expressions(root, expr);
924 :
925 : /*
926 : * If it's a qual or havingQual, canonicalize it.
927 : */
928 40300 : if (kind == EXPRKIND_QUAL)
929 : {
930 12003 : expr = (Node *) canonicalize_qual((Expr *) expr);
931 :
932 : #ifdef OPTIMIZER_DEBUG
933 : printf("After canonicalize_qual()\n");
934 : pprint(expr);
935 : #endif
936 : }
937 :
938 : /* Expand SubLinks to SubPlans */
939 40300 : if (root->parse->hasSubLinks)
940 3610 : expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
941 :
942 : /*
943 : * XXX do not insert anything here unless you have grokked the comments in
944 : * SS_replace_correlation_vars ...
945 : */
946 :
947 : /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
948 40300 : if (root->query_level > 1)
949 5968 : expr = SS_replace_correlation_vars(root, expr);
950 :
951 : /*
952 : * If it's a qual or havingQual, convert it to implicit-AND format. (We
953 : * don't want to do this before eval_const_expressions, since the latter
954 : * would be unable to simplify a top-level AND correctly. Also,
955 : * SS_process_sublinks expects explicit-AND format.)
956 : */
957 40300 : if (kind == EXPRKIND_QUAL)
958 12003 : expr = (Node *) make_ands_implicit((Expr *) expr);
959 :
960 40300 : return expr;
961 : }
962 :
963 : /*
964 : * preprocess_qual_conditions
965 : * Recursively scan the query's jointree and do subquery_planner's
966 : * preprocessing work on each qual condition found therein.
967 : */
968 : static void
969 47250 : preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
970 : {
971 47250 : if (jtnode == NULL)
972 47250 : return;
973 47250 : if (IsA(jtnode, RangeTblRef))
974 : {
975 : /* nothing to do here */
976 : }
977 29607 : else if (IsA(jtnode, FromExpr))
978 : {
979 26966 : FromExpr *f = (FromExpr *) jtnode;
980 : ListCell *l;
981 :
982 43756 : foreach(l, f->fromlist)
983 16790 : preprocess_qual_conditions(root, lfirst(l));
984 :
985 26966 : f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
986 : }
987 2641 : else if (IsA(jtnode, JoinExpr))
988 : {
989 2641 : JoinExpr *j = (JoinExpr *) jtnode;
990 :
991 2641 : preprocess_qual_conditions(root, j->larg);
992 2641 : preprocess_qual_conditions(root, j->rarg);
993 :
994 2641 : j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
995 : }
996 : else
997 0 : elog(ERROR, "unrecognized node type: %d",
998 : (int) nodeTag(jtnode));
999 : }
1000 :
1001 : /*
1002 : * preprocess_phv_expression
1003 : * Do preprocessing on a PlaceHolderVar expression that's been pulled up.
1004 : *
1005 : * If a LATERAL subquery references an output of another subquery, and that
1006 : * output must be wrapped in a PlaceHolderVar because of an intermediate outer
1007 : * join, then we'll push the PlaceHolderVar expression down into the subquery
1008 : * and later pull it back up during find_lateral_references, which runs after
1009 : * subquery_planner has preprocessed all the expressions that were in the
1010 : * current query level to start with. So we need to preprocess it then.
1011 : */
1012 : Expr *
1013 6 : preprocess_phv_expression(PlannerInfo *root, Expr *expr)
1014 : {
1015 6 : return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
1016 : }
1017 :
1018 : /*
1019 : * inheritance_planner
1020 : * Generate Paths in the case where the result relation is an
1021 : * inheritance set.
1022 : *
1023 : * We have to handle this case differently from cases where a source relation
1024 : * is an inheritance set. Source inheritance is expanded at the bottom of the
1025 : * plan tree (see allpaths.c), but target inheritance has to be expanded at
1026 : * the top. The reason is that for UPDATE, each target relation needs a
1027 : * different targetlist matching its own column set. Fortunately,
1028 : * the UPDATE/DELETE target can never be the nullable side of an outer join,
1029 : * so it's OK to generate the plan this way.
1030 : *
1031 : * Returns nothing; the useful output is in the Paths we attach to
1032 : * the (UPPERREL_FINAL, NULL) upperrel stored in *root.
1033 : *
1034 : * Note that we have not done set_cheapest() on the final rel; it's convenient
1035 : * to leave this to the caller.
1036 : */
1037 : static void
1038 67 : inheritance_planner(PlannerInfo *root)
1039 : {
1040 67 : Query *parse = root->parse;
1041 67 : int parentRTindex = parse->resultRelation;
1042 : Bitmapset *subqueryRTindexes;
1043 : Bitmapset *modifiableARIindexes;
1044 67 : int nominalRelation = -1;
1045 67 : List *final_rtable = NIL;
1046 67 : int save_rel_array_size = 0;
1047 67 : RelOptInfo **save_rel_array = NULL;
1048 67 : List *subpaths = NIL;
1049 67 : List *subroots = NIL;
1050 67 : List *resultRelations = NIL;
1051 67 : List *withCheckOptionLists = NIL;
1052 67 : List *returningLists = NIL;
1053 : List *rowMarks;
1054 : RelOptInfo *final_rel;
1055 : ListCell *lc;
1056 : Index rti;
1057 : RangeTblEntry *parent_rte;
1058 67 : List *partitioned_rels = NIL;
1059 :
1060 67 : Assert(parse->commandType != CMD_INSERT);
1061 :
1062 : /*
1063 : * We generate a modified instance of the original Query for each target
1064 : * relation, plan that, and put all the plans into a list that will be
1065 : * controlled by a single ModifyTable node. All the instances share the
1066 : * same rangetable, but each instance must have its own set of subquery
1067 : * RTEs within the finished rangetable because (1) they are likely to get
1068 : * scribbled on during planning, and (2) it's not inconceivable that
1069 : * subqueries could get planned differently in different cases. We need
1070 : * not create duplicate copies of other RTE kinds, in particular not the
1071 : * target relations, because they don't have either of those issues. Not
1072 : * having to duplicate the target relations is important because doing so
1073 : * (1) would result in a rangetable of length O(N^2) for N targets, with
1074 : * at least O(N^3) work expended here; and (2) would greatly complicate
1075 : * management of the rowMarks list.
1076 : *
1077 : * To begin with, generate a bitmapset of the relids of the subquery RTEs.
1078 : */
1079 67 : subqueryRTindexes = NULL;
1080 67 : rti = 1;
1081 404 : foreach(lc, parse->rtable)
1082 : {
1083 337 : RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1084 :
1085 337 : if (rte->rtekind == RTE_SUBQUERY)
1086 13 : subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
1087 337 : rti++;
1088 : }
1089 :
1090 : /*
1091 : * Next, we want to identify which AppendRelInfo items contain references
1092 : * to any of the aforesaid subquery RTEs. These items will need to be
1093 : * copied and modified to adjust their subquery references; whereas the
1094 : * other ones need not be touched. It's worth being tense over this
1095 : * because we can usually avoid processing most of the AppendRelInfo
1096 : * items, thereby saving O(N^2) space and time when the target is a large
1097 : * inheritance tree. We can identify AppendRelInfo items by their
1098 : * child_relid, since that should be unique within the list.
1099 : */
1100 67 : modifiableARIindexes = NULL;
1101 67 : if (subqueryRTindexes != NULL)
1102 : {
1103 51 : foreach(lc, root->append_rel_list)
1104 : {
1105 44 : AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1106 :
1107 82 : if (bms_is_member(appinfo->parent_relid, subqueryRTindexes) ||
1108 76 : bms_is_member(appinfo->child_relid, subqueryRTindexes) ||
1109 38 : bms_overlap(pull_varnos((Node *) appinfo->translated_vars),
1110 : subqueryRTindexes))
1111 6 : modifiableARIindexes = bms_add_member(modifiableARIindexes,
1112 6 : appinfo->child_relid);
1113 : }
1114 : }
1115 :
1116 : /*
1117 : * If the parent RTE is a partitioned table, we should use that as the
1118 : * nominal relation, because the RTEs added for partitioned tables
1119 : * (including the root parent) as child members of the inheritance set do
1120 : * not appear anywhere else in the plan. The situation is exactly the
1121 : * opposite in the case of non-partitioned inheritance parent as described
1122 : * below.
1123 : */
1124 67 : parent_rte = rt_fetch(parentRTindex, root->parse->rtable);
1125 67 : if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1126 11 : nominalRelation = parentRTindex;
1127 :
1128 : /*
1129 : * And now we can get on with generating a plan for each child table.
1130 : */
1131 284 : foreach(lc, root->append_rel_list)
1132 : {
1133 217 : AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1134 : PlannerInfo *subroot;
1135 : RangeTblEntry *child_rte;
1136 : RelOptInfo *sub_final_rel;
1137 : Path *subpath;
1138 :
1139 : /* append_rel_list contains all append rels; ignore others */
1140 217 : if (appinfo->parent_relid != parentRTindex)
1141 63 : continue;
1142 :
1143 : /*
1144 : * We need a working copy of the PlannerInfo so that we can control
1145 : * propagation of information back to the main copy.
1146 : */
1147 189 : subroot = makeNode(PlannerInfo);
1148 189 : memcpy(subroot, root, sizeof(PlannerInfo));
1149 :
1150 : /*
1151 : * Generate modified query with this rel as target. We first apply
1152 : * adjust_appendrel_attrs, which copies the Query and changes
1153 : * references to the parent RTE to refer to the current child RTE,
1154 : * then fool around with subquery RTEs.
1155 : */
1156 189 : subroot->parse = (Query *)
1157 189 : adjust_appendrel_attrs(root,
1158 : (Node *) parse,
1159 : 1, &appinfo);
1160 :
1161 : /*
1162 : * If there are securityQuals attached to the parent, move them to the
1163 : * child rel (they've already been transformed properly for that).
1164 : */
1165 189 : parent_rte = rt_fetch(parentRTindex, subroot->parse->rtable);
1166 189 : child_rte = rt_fetch(appinfo->child_relid, subroot->parse->rtable);
1167 189 : child_rte->securityQuals = parent_rte->securityQuals;
1168 189 : parent_rte->securityQuals = NIL;
1169 :
1170 : /*
1171 : * The rowMarks list might contain references to subquery RTEs, so
1172 : * make a copy that we can apply ChangeVarNodes to. (Fortunately, the
1173 : * executor doesn't need to see the modified copies --- we can just
1174 : * pass it the original rowMarks list.)
1175 : */
1176 189 : subroot->rowMarks = copyObject(root->rowMarks);
1177 :
1178 : /*
1179 : * The append_rel_list likewise might contain references to subquery
1180 : * RTEs (if any subqueries were flattenable UNION ALLs). So prepare
1181 : * to apply ChangeVarNodes to that, too. As explained above, we only
1182 : * want to copy items that actually contain such references; the rest
1183 : * can just get linked into the subroot's append_rel_list.
1184 : *
1185 : * If we know there are no such references, we can just use the outer
1186 : * append_rel_list unmodified.
1187 : */
1188 189 : if (modifiableARIindexes != NULL)
1189 : {
1190 : ListCell *lc2;
1191 :
1192 8 : subroot->append_rel_list = NIL;
1193 84 : foreach(lc2, root->append_rel_list)
1194 : {
1195 76 : AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1196 :
1197 76 : if (bms_is_member(appinfo2->child_relid, modifiableARIindexes))
1198 16 : appinfo2 = copyObject(appinfo2);
1199 :
1200 76 : subroot->append_rel_list = lappend(subroot->append_rel_list,
1201 : appinfo2);
1202 : }
1203 : }
1204 :
1205 : /*
1206 : * Add placeholders to the child Query's rangetable list to fill the
1207 : * RT indexes already reserved for subqueries in previous children.
1208 : * These won't be referenced, so there's no need to make them very
1209 : * valid-looking.
1210 : */
1211 393 : while (list_length(subroot->parse->rtable) < list_length(final_rtable))
1212 30 : subroot->parse->rtable = lappend(subroot->parse->rtable,
1213 15 : makeNode(RangeTblEntry));
1214 :
1215 : /*
1216 : * If this isn't the first child Query, generate duplicates of all
1217 : * subquery RTEs, and adjust Var numbering to reference the
1218 : * duplicates. To simplify the loop logic, we scan the original rtable
1219 : * not the copy just made by adjust_appendrel_attrs; that should be OK
1220 : * since subquery RTEs couldn't contain any references to the target
1221 : * rel.
1222 : */
1223 189 : if (final_rtable != NIL && subqueryRTindexes != NULL)
1224 : {
1225 : ListCell *lr;
1226 :
1227 15 : rti = 1;
1228 170 : foreach(lr, parse->rtable)
1229 : {
1230 155 : RangeTblEntry *rte = (RangeTblEntry *) lfirst(lr);
1231 :
1232 155 : if (bms_is_member(rti, subqueryRTindexes))
1233 : {
1234 : Index newrti;
1235 :
1236 : /*
1237 : * The RTE can't contain any references to its own RT
1238 : * index, except in its securityQuals, so we can save a
1239 : * few cycles by applying ChangeVarNodes to the rest of
1240 : * the rangetable before we append the RTE to it.
1241 : */
1242 25 : newrti = list_length(subroot->parse->rtable) + 1;
1243 25 : ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0);
1244 25 : ChangeVarNodes((Node *) subroot->rowMarks, rti, newrti, 0);
1245 : /* Skip processing unchanging parts of append_rel_list */
1246 25 : if (modifiableARIindexes != NULL)
1247 : {
1248 : ListCell *lc2;
1249 :
1250 159 : foreach(lc2, subroot->append_rel_list)
1251 : {
1252 144 : AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1253 :
1254 144 : if (bms_is_member(appinfo2->child_relid,
1255 : modifiableARIindexes))
1256 30 : ChangeVarNodes((Node *) appinfo2, rti, newrti, 0);
1257 : }
1258 : }
1259 25 : rte = copyObject(rte);
1260 25 : ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0);
1261 25 : subroot->parse->rtable = lappend(subroot->parse->rtable,
1262 : rte);
1263 : }
1264 155 : rti++;
1265 : }
1266 : }
1267 :
1268 : /* There shouldn't be any OJ info to translate, as yet */
1269 189 : Assert(subroot->join_info_list == NIL);
1270 : /* and we haven't created PlaceHolderInfos, either */
1271 189 : Assert(subroot->placeholder_list == NIL);
1272 : /* hack to mark target relation as an inheritance partition */
1273 189 : subroot->hasInheritedTarget = true;
1274 :
1275 : /* Generate Path(s) for accessing this result relation */
1276 189 : grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
1277 :
1278 : /*
1279 : * Set the nomimal target relation of the ModifyTable node if not
1280 : * already done. We use the inheritance parent RTE as the nominal
1281 : * target relation if it's a partitioned table (see just above this
1282 : * loop). In the non-partitioned parent case, we'll use the first
1283 : * child relation (even if it's excluded) as the nominal target
1284 : * relation. Because of the way expand_inherited_rtentry works, the
1285 : * latter should be the RTE representing the parent table in its role
1286 : * as a simple member of the inheritance set.
1287 : *
1288 : * It would be logically cleaner to *always* use the inheritance
1289 : * parent RTE as the nominal relation; but that RTE is not otherwise
1290 : * referenced in the plan in the non-partitioned inheritance case.
1291 : * Instead the duplicate child RTE created by expand_inherited_rtentry
1292 : * is used elsewhere in the plan, so using the original parent RTE
1293 : * would give rise to confusing use of multiple aliases in EXPLAIN
1294 : * output for what the user will think is the "same" table. OTOH,
1295 : * it's not a problem in the partitioned inheritance case, because the
1296 : * duplicate child RTE added for the parent does not appear anywhere
1297 : * else in the plan tree.
1298 : */
1299 189 : if (nominalRelation < 0)
1300 56 : nominalRelation = appinfo->child_relid;
1301 :
1302 : /*
1303 : * Select cheapest path in case there's more than one. We always run
1304 : * modification queries to conclusion, so we care only for the
1305 : * cheapest-total path.
1306 : */
1307 189 : sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
1308 189 : set_cheapest(sub_final_rel);
1309 189 : subpath = sub_final_rel->cheapest_total_path;
1310 :
1311 : /*
1312 : * If this child rel was excluded by constraint exclusion, exclude it
1313 : * from the result plan.
1314 : */
1315 189 : if (IS_DUMMY_PATH(subpath))
1316 7 : continue;
1317 :
1318 : /*
1319 : * If this is the first non-excluded child, its post-planning rtable
1320 : * becomes the initial contents of final_rtable; otherwise, append
1321 : * just its modified subquery RTEs to final_rtable.
1322 : */
1323 182 : if (final_rtable == NIL)
1324 66 : final_rtable = subroot->parse->rtable;
1325 : else
1326 232 : final_rtable = list_concat(final_rtable,
1327 116 : list_copy_tail(subroot->parse->rtable,
1328 : list_length(final_rtable)));
1329 :
1330 : /*
1331 : * We need to collect all the RelOptInfos from all child plans into
1332 : * the main PlannerInfo, since setrefs.c will need them. We use the
1333 : * last child's simple_rel_array (previous ones are too short), so we
1334 : * have to propagate forward the RelOptInfos that were already built
1335 : * in previous children.
1336 : */
1337 182 : Assert(subroot->simple_rel_array_size >= save_rel_array_size);
1338 834 : for (rti = 1; rti < save_rel_array_size; rti++)
1339 : {
1340 652 : RelOptInfo *brel = save_rel_array[rti];
1341 :
1342 652 : if (brel)
1343 278 : subroot->simple_rel_array[rti] = brel;
1344 : }
1345 182 : save_rel_array_size = subroot->simple_rel_array_size;
1346 182 : save_rel_array = subroot->simple_rel_array;
1347 :
1348 : /* Make sure any initplans from this rel get into the outer list */
1349 182 : root->init_plans = subroot->init_plans;
1350 :
1351 : /* Build list of sub-paths */
1352 182 : subpaths = lappend(subpaths, subpath);
1353 :
1354 : /* Build list of modified subroots, too */
1355 182 : subroots = lappend(subroots, subroot);
1356 :
1357 : /* Build list of target-relation RT indexes */
1358 182 : resultRelations = lappend_int(resultRelations, appinfo->child_relid);
1359 :
1360 : /* Build lists of per-relation WCO and RETURNING targetlists */
1361 182 : if (parse->withCheckOptions)
1362 24 : withCheckOptionLists = lappend(withCheckOptionLists,
1363 24 : subroot->parse->withCheckOptions);
1364 182 : if (parse->returningList)
1365 21 : returningLists = lappend(returningLists,
1366 21 : subroot->parse->returningList);
1367 :
1368 182 : Assert(!parse->onConflict);
1369 : }
1370 :
1371 67 : if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
1372 : {
1373 11 : partitioned_rels = get_partitioned_child_rels(root, parentRTindex);
1374 : /* The root partitioned table is included as a child rel */
1375 11 : Assert(list_length(partitioned_rels) >= 1);
1376 : }
1377 :
1378 : /* Result path must go into outer query's FINAL upperrel */
1379 67 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1380 :
1381 : /*
1382 : * We don't currently worry about setting final_rel's consider_parallel
1383 : * flag in this case, nor about allowing FDWs or create_upper_paths_hook
1384 : * to get control here.
1385 : */
1386 :
1387 : /*
1388 : * If we managed to exclude every child rel, return a dummy plan; it
1389 : * doesn't even need a ModifyTable node.
1390 : */
1391 67 : if (subpaths == NIL)
1392 : {
1393 1 : set_dummy_rel_pathlist(final_rel);
1394 68 : return;
1395 : }
1396 :
1397 : /*
1398 : * Put back the final adjusted rtable into the master copy of the Query.
1399 : * (We mustn't do this if we found no non-excluded children.)
1400 : */
1401 66 : parse->rtable = final_rtable;
1402 66 : root->simple_rel_array_size = save_rel_array_size;
1403 66 : root->simple_rel_array = save_rel_array;
1404 : /* Must reconstruct master's simple_rte_array, too */
1405 66 : root->simple_rte_array = (RangeTblEntry **)
1406 66 : palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *));
1407 66 : rti = 1;
1408 424 : foreach(lc, final_rtable)
1409 : {
1410 358 : RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1411 :
1412 358 : root->simple_rte_array[rti++] = rte;
1413 : }
1414 :
1415 : /*
1416 : * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
1417 : * have dealt with fetching non-locked marked rows, else we need to have
1418 : * ModifyTable do that.
1419 : */
1420 66 : if (parse->rowMarks)
1421 0 : rowMarks = NIL;
1422 : else
1423 66 : rowMarks = root->rowMarks;
1424 :
1425 : /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
1426 66 : add_path(final_rel, (Path *)
1427 132 : create_modifytable_path(root, final_rel,
1428 : parse->commandType,
1429 66 : parse->canSetTag,
1430 : nominalRelation,
1431 : partitioned_rels,
1432 : resultRelations,
1433 : subpaths,
1434 : subroots,
1435 : withCheckOptionLists,
1436 : returningLists,
1437 : rowMarks,
1438 : NULL,
1439 : SS_assign_special_param(root)));
1440 : }
1441 :
1442 : /*--------------------
1443 : * grouping_planner
1444 : * Perform planning steps related to grouping, aggregation, etc.
1445 : *
1446 : * This function adds all required top-level processing to the scan/join
1447 : * Path(s) produced by query_planner.
1448 : *
1449 : * If inheritance_update is true, we're being called from inheritance_planner
1450 : * and should not include a ModifyTable step in the resulting Path(s).
1451 : * (inheritance_planner will create a single ModifyTable node covering all the
1452 : * target tables.)
1453 : *
1454 : * tuple_fraction is the fraction of tuples we expect will be retrieved.
1455 : * tuple_fraction is interpreted as follows:
1456 : * 0: expect all tuples to be retrieved (normal case)
1457 : * 0 < tuple_fraction < 1: expect the given fraction of tuples available
1458 : * from the plan to be retrieved
1459 : * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1460 : * expected to be retrieved (ie, a LIMIT specification)
1461 : *
1462 : * Returns nothing; the useful output is in the Paths we attach to the
1463 : * (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
1464 : * root->processed_tlist contains the final processed targetlist.
1465 : *
1466 : * Note that we have not done set_cheapest() on the final rel; it's convenient
1467 : * to leave this to the caller.
1468 : *--------------------
1469 : */
1470 : static void
1471 25300 : grouping_planner(PlannerInfo *root, bool inheritance_update,
1472 : double tuple_fraction)
1473 : {
1474 25300 : Query *parse = root->parse;
1475 25300 : List *tlist = parse->targetList;
1476 25300 : int64 offset_est = 0;
1477 25300 : int64 count_est = 0;
1478 25300 : double limit_tuples = -1.0;
1479 25300 : bool have_postponed_srfs = false;
1480 : PathTarget *final_target;
1481 : List *final_targets;
1482 : List *final_targets_contain_srfs;
1483 : RelOptInfo *current_rel;
1484 : RelOptInfo *final_rel;
1485 : ListCell *lc;
1486 :
1487 : /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1488 25300 : if (parse->limitCount || parse->limitOffset)
1489 : {
1490 218 : tuple_fraction = preprocess_limit(root, tuple_fraction,
1491 : &offset_est, &count_est);
1492 :
1493 : /*
1494 : * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1495 : * estimate the effects of using a bounded sort.
1496 : */
1497 218 : if (count_est > 0 && offset_est >= 0)
1498 173 : limit_tuples = (double) count_est + (double) offset_est;
1499 : }
1500 :
1501 : /* Make tuple_fraction accessible to lower-level routines */
1502 25300 : root->tuple_fraction = tuple_fraction;
1503 :
1504 25300 : if (parse->setOperations)
1505 : {
1506 : /*
1507 : * If there's a top-level ORDER BY, assume we have to fetch all the
1508 : * tuples. This might be too simplistic given all the hackery below
1509 : * to possibly avoid the sort; but the odds of accurate estimates here
1510 : * are pretty low anyway. XXX try to get rid of this in favor of
1511 : * letting plan_set_operations generate both fast-start and
1512 : * cheapest-total paths.
1513 : */
1514 127 : if (parse->sortClause)
1515 49 : root->tuple_fraction = 0.0;
1516 :
1517 : /*
1518 : * Construct Paths for set operations. The results will not need any
1519 : * work except perhaps a top-level sort and/or LIMIT. Note that any
1520 : * special work for recursive unions is the responsibility of
1521 : * plan_set_operations.
1522 : */
1523 127 : current_rel = plan_set_operations(root);
1524 :
1525 : /*
1526 : * We should not need to call preprocess_targetlist, since we must be
1527 : * in a SELECT query node. Instead, use the targetlist returned by
1528 : * plan_set_operations (since this tells whether it returned any
1529 : * resjunk columns!), and transfer any sort key information from the
1530 : * original tlist.
1531 : */
1532 127 : Assert(parse->commandType == CMD_SELECT);
1533 :
1534 127 : tlist = root->processed_tlist; /* from plan_set_operations */
1535 :
1536 : /* for safety, copy processed_tlist instead of modifying in-place */
1537 127 : tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList);
1538 :
1539 : /* Save aside the final decorated tlist */
1540 127 : root->processed_tlist = tlist;
1541 :
1542 : /* Also extract the PathTarget form of the setop result tlist */
1543 127 : final_target = current_rel->cheapest_total_path->pathtarget;
1544 :
1545 : /* The setop result tlist couldn't contain any SRFs */
1546 127 : Assert(!parse->hasTargetSRFs);
1547 127 : final_targets = final_targets_contain_srfs = NIL;
1548 :
1549 : /*
1550 : * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1551 : * checked already, but let's make sure).
1552 : */
1553 127 : if (parse->rowMarks)
1554 0 : ereport(ERROR,
1555 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1556 : /*------
1557 : translator: %s is a SQL row locking clause such as FOR UPDATE */
1558 : errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1559 : LCS_asString(((RowMarkClause *)
1560 : linitial(parse->rowMarks))->strength))));
1561 :
1562 : /*
1563 : * Calculate pathkeys that represent result ordering requirements
1564 : */
1565 127 : Assert(parse->distinctClause == NIL);
1566 127 : root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
1567 : parse->sortClause,
1568 : tlist);
1569 : }
1570 : else
1571 : {
1572 : /* No set operations, do regular planning */
1573 : PathTarget *sort_input_target;
1574 : List *sort_input_targets;
1575 : List *sort_input_targets_contain_srfs;
1576 : PathTarget *grouping_target;
1577 : List *grouping_targets;
1578 : List *grouping_targets_contain_srfs;
1579 : PathTarget *scanjoin_target;
1580 : List *scanjoin_targets;
1581 : List *scanjoin_targets_contain_srfs;
1582 : bool have_grouping;
1583 : AggClauseCosts agg_costs;
1584 25173 : WindowFuncLists *wflists = NULL;
1585 25173 : List *activeWindows = NIL;
1586 25173 : grouping_sets_data *gset_data = NULL;
1587 : standard_qp_extra qp_extra;
1588 :
1589 : /* A recursive query should always have setOperations */
1590 25173 : Assert(!root->hasRecursion);
1591 :
1592 : /* Preprocess grouping sets and GROUP BY clause, if any */
1593 25173 : if (parse->groupingSets)
1594 : {
1595 91 : gset_data = preprocess_grouping_sets(root);
1596 : }
1597 : else
1598 : {
1599 : /* Preprocess regular GROUP BY clause, if any */
1600 25082 : if (parse->groupClause)
1601 217 : parse->groupClause = preprocess_groupclause(root, NIL);
1602 : }
1603 :
1604 : /* Preprocess targetlist */
1605 25172 : tlist = preprocess_targetlist(root, tlist);
1606 :
1607 25172 : if (parse->onConflict)
1608 332 : parse->onConflict->onConflictSet =
1609 166 : preprocess_onconflict_targetlist(parse->onConflict->onConflictSet,
1610 : parse->resultRelation,
1611 : parse->rtable);
1612 :
1613 : /*
1614 : * We are now done hacking up the query's targetlist. Most of the
1615 : * remaining planning work will be done with the PathTarget
1616 : * representation of tlists, but save aside the full representation so
1617 : * that we can transfer its decoration (resnames etc) to the topmost
1618 : * tlist of the finished Plan.
1619 : */
1620 25172 : root->processed_tlist = tlist;
1621 :
1622 : /*
1623 : * Collect statistics about aggregates for estimating costs, and mark
1624 : * all the aggregates with resolved aggtranstypes. We must do this
1625 : * before slicing and dicing the tlist into various pathtargets, else
1626 : * some copies of the Aggref nodes might escape being marked with the
1627 : * correct transtypes.
1628 : *
1629 : * Note: currently, we do not detect duplicate aggregates here. This
1630 : * may result in somewhat-overestimated cost, which is fine for our
1631 : * purposes since all Paths will get charged the same. But at some
1632 : * point we might wish to do that detection in the planner, rather
1633 : * than during executor startup.
1634 : */
1635 25172 : MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1636 25172 : if (parse->hasAggs)
1637 : {
1638 2367 : get_agg_clause_costs(root, (Node *) tlist, AGGSPLIT_SIMPLE,
1639 : &agg_costs);
1640 2367 : get_agg_clause_costs(root, parse->havingQual, AGGSPLIT_SIMPLE,
1641 : &agg_costs);
1642 : }
1643 :
1644 : /*
1645 : * Locate any window functions in the tlist. (We don't need to look
1646 : * anywhere else, since expressions used in ORDER BY will be in there
1647 : * too.) Note that they could all have been eliminated by constant
1648 : * folding, in which case we don't need to do any more work.
1649 : */
1650 25172 : if (parse->hasWindowFuncs)
1651 : {
1652 133 : wflists = find_window_functions((Node *) tlist,
1653 133 : list_length(parse->windowClause));
1654 133 : if (wflists->numWindowFuncs > 0)
1655 133 : activeWindows = select_active_windows(root, wflists);
1656 : else
1657 0 : parse->hasWindowFuncs = false;
1658 : }
1659 :
1660 : /*
1661 : * Preprocess MIN/MAX aggregates, if any. Note: be careful about
1662 : * adding logic between here and the query_planner() call. Anything
1663 : * that is needed in MIN/MAX-optimizable cases will have to be
1664 : * duplicated in planagg.c.
1665 : */
1666 25172 : if (parse->hasAggs)
1667 2367 : preprocess_minmax_aggregates(root, tlist);
1668 :
1669 : /*
1670 : * Figure out whether there's a hard limit on the number of rows that
1671 : * query_planner's result subplan needs to return. Even if we know a
1672 : * hard limit overall, it doesn't apply if the query has any
1673 : * grouping/aggregation operations, or SRFs in the tlist.
1674 : */
1675 50044 : if (parse->groupClause ||
1676 49737 : parse->groupingSets ||
1677 49670 : parse->distinctClause ||
1678 47519 : parse->hasAggs ||
1679 45312 : parse->hasWindowFuncs ||
1680 45006 : parse->hasTargetSRFs ||
1681 22408 : root->hasHavingQual)
1682 2767 : root->limit_tuples = -1.0;
1683 : else
1684 22405 : root->limit_tuples = limit_tuples;
1685 :
1686 : /* Set up data needed by standard_qp_callback */
1687 25172 : qp_extra.tlist = tlist;
1688 25172 : qp_extra.activeWindows = activeWindows;
1689 25172 : qp_extra.groupClause = (gset_data
1690 90 : ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL)
1691 25262 : : parse->groupClause);
1692 :
1693 : /*
1694 : * Generate the best unsorted and presorted paths for the scan/join
1695 : * portion of this Query, ie the processing represented by the
1696 : * FROM/WHERE clauses. (Note there may not be any presorted paths.)
1697 : * We also generate (in standard_qp_callback) pathkey representations
1698 : * of the query's sort clause, distinct clause, etc.
1699 : */
1700 25172 : current_rel = query_planner(root, tlist,
1701 : standard_qp_callback, &qp_extra);
1702 :
1703 : /*
1704 : * Convert the query's result tlist into PathTarget format.
1705 : *
1706 : * Note: it's desirable to not do this till after query_planner(),
1707 : * because the target width estimates can use per-Var width numbers
1708 : * that were obtained within query_planner().
1709 : */
1710 25170 : final_target = create_pathtarget(root, tlist);
1711 :
1712 : /*
1713 : * If ORDER BY was given, consider whether we should use a post-sort
1714 : * projection, and compute the adjusted target for preceding steps if
1715 : * so.
1716 : */
1717 25170 : if (parse->sortClause)
1718 2767 : sort_input_target = make_sort_input_target(root,
1719 : final_target,
1720 : &have_postponed_srfs);
1721 : else
1722 22403 : sort_input_target = final_target;
1723 :
1724 : /*
1725 : * If we have window functions to deal with, the output from any
1726 : * grouping step needs to be what the window functions want;
1727 : * otherwise, it should be sort_input_target.
1728 : */
1729 25170 : if (activeWindows)
1730 133 : grouping_target = make_window_input_target(root,
1731 : final_target,
1732 : activeWindows);
1733 : else
1734 25037 : grouping_target = sort_input_target;
1735 :
1736 : /*
1737 : * If we have grouping or aggregation to do, the topmost scan/join
1738 : * plan node must emit what the grouping step wants; otherwise, it
1739 : * should emit grouping_target.
1740 : */
1741 100073 : have_grouping = (parse->groupClause || parse->groupingSets ||
1742 72801 : parse->hasAggs || root->hasHavingQual);
1743 25170 : if (have_grouping)
1744 2405 : scanjoin_target = make_group_input_target(root, final_target);
1745 : else
1746 22765 : scanjoin_target = grouping_target;
1747 :
1748 : /*
1749 : * If there are any SRFs in the targetlist, we must separate each of
1750 : * these PathTargets into SRF-computing and SRF-free targets. Replace
1751 : * each of the named targets with a SRF-free version, and remember the
1752 : * list of additional projection steps we need to add afterwards.
1753 : */
1754 25170 : if (parse->hasTargetSRFs)
1755 : {
1756 : /* final_target doesn't recompute any SRFs in sort_input_target */
1757 216 : split_pathtarget_at_srfs(root, final_target, sort_input_target,
1758 : &final_targets,
1759 : &final_targets_contain_srfs);
1760 216 : final_target = (PathTarget *) linitial(final_targets);
1761 216 : Assert(!linitial_int(final_targets_contain_srfs));
1762 : /* likewise for sort_input_target vs. grouping_target */
1763 216 : split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
1764 : &sort_input_targets,
1765 : &sort_input_targets_contain_srfs);
1766 216 : sort_input_target = (PathTarget *) linitial(sort_input_targets);
1767 216 : Assert(!linitial_int(sort_input_targets_contain_srfs));
1768 : /* likewise for grouping_target vs. scanjoin_target */
1769 216 : split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
1770 : &grouping_targets,
1771 : &grouping_targets_contain_srfs);
1772 216 : grouping_target = (PathTarget *) linitial(grouping_targets);
1773 216 : Assert(!linitial_int(grouping_targets_contain_srfs));
1774 : /* scanjoin_target will not have any SRFs precomputed for it */
1775 216 : split_pathtarget_at_srfs(root, scanjoin_target, NULL,
1776 : &scanjoin_targets,
1777 : &scanjoin_targets_contain_srfs);
1778 216 : scanjoin_target = (PathTarget *) linitial(scanjoin_targets);
1779 216 : Assert(!linitial_int(scanjoin_targets_contain_srfs));
1780 : }
1781 : else
1782 : {
1783 : /* initialize lists, just to keep compiler quiet */
1784 24954 : final_targets = final_targets_contain_srfs = NIL;
1785 24954 : sort_input_targets = sort_input_targets_contain_srfs = NIL;
1786 24954 : grouping_targets = grouping_targets_contain_srfs = NIL;
1787 24954 : scanjoin_targets = scanjoin_targets_contain_srfs = NIL;
1788 : }
1789 :
1790 : /*
1791 : * Forcibly apply SRF-free scan/join target to all the Paths for the
1792 : * scan/join rel.
1793 : *
1794 : * In principle we should re-run set_cheapest() here to identify the
1795 : * cheapest path, but it seems unlikely that adding the same tlist
1796 : * eval costs to all the paths would change that, so we don't bother.
1797 : * Instead, just assume that the cheapest-startup and cheapest-total
1798 : * paths remain so. (There should be no parameterized paths anymore,
1799 : * so we needn't worry about updating cheapest_parameterized_paths.)
1800 : */
1801 51427 : foreach(lc, current_rel->pathlist)
1802 : {
1803 26257 : Path *subpath = (Path *) lfirst(lc);
1804 : Path *path;
1805 :
1806 26257 : Assert(subpath->param_info == NULL);
1807 26257 : path = apply_projection_to_path(root, current_rel,
1808 : subpath, scanjoin_target);
1809 : /* If we had to add a Result, path is different from subpath */
1810 26257 : if (path != subpath)
1811 : {
1812 463 : lfirst(lc) = path;
1813 463 : if (subpath == current_rel->cheapest_startup_path)
1814 443 : current_rel->cheapest_startup_path = path;
1815 463 : if (subpath == current_rel->cheapest_total_path)
1816 443 : current_rel->cheapest_total_path = path;
1817 : }
1818 : }
1819 :
1820 : /*
1821 : * Upper planning steps which make use of the top scan/join rel's
1822 : * partial pathlist will expect partial paths for that rel to produce
1823 : * the same output as complete paths ... and we just changed the
1824 : * output for the complete paths, so we'll need to do the same thing
1825 : * for partial paths. But only parallel-safe expressions can be
1826 : * computed by partial paths.
1827 : */
1828 25577 : if (current_rel->partial_pathlist &&
1829 206 : is_parallel_safe(root, (Node *) scanjoin_target->exprs))
1830 : {
1831 : /* Apply the scan/join target to each partial path */
1832 403 : foreach(lc, current_rel->partial_pathlist)
1833 : {
1834 202 : Path *subpath = (Path *) lfirst(lc);
1835 : Path *newpath;
1836 :
1837 : /* Shouldn't have any parameterized paths anymore */
1838 202 : Assert(subpath->param_info == NULL);
1839 :
1840 : /*
1841 : * Don't use apply_projection_to_path() here, because there
1842 : * could be other pointers to these paths, and therefore we
1843 : * mustn't modify them in place.
1844 : */
1845 202 : newpath = (Path *) create_projection_path(root,
1846 : current_rel,
1847 : subpath,
1848 : scanjoin_target);
1849 202 : lfirst(lc) = newpath;
1850 : }
1851 : }
1852 : else
1853 : {
1854 : /*
1855 : * In the unfortunate event that scanjoin_target is not
1856 : * parallel-safe, we can't apply it to the partial paths; in that
1857 : * case, we'll need to forget about the partial paths, which
1858 : * aren't valid input for upper planning steps.
1859 : */
1860 24969 : current_rel->partial_pathlist = NIL;
1861 : }
1862 :
1863 : /* Now fix things up if scan/join target contains SRFs */
1864 25170 : if (parse->hasTargetSRFs)
1865 216 : adjust_paths_for_srfs(root, current_rel,
1866 : scanjoin_targets,
1867 : scanjoin_targets_contain_srfs);
1868 :
1869 : /*
1870 : * Save the various upper-rel PathTargets we just computed into
1871 : * root->upper_targets[]. The core code doesn't use this, but it
1872 : * provides a convenient place for extensions to get at the info. For
1873 : * consistency, we save all the intermediate targets, even though some
1874 : * of the corresponding upperrels might not be needed for this query.
1875 : */
1876 25170 : root->upper_targets[UPPERREL_FINAL] = final_target;
1877 25170 : root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1878 25170 : root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1879 :
1880 : /*
1881 : * If we have grouping and/or aggregation, consider ways to implement
1882 : * that. We build a new upperrel representing the output of this
1883 : * phase.
1884 : */
1885 25170 : if (have_grouping)
1886 : {
1887 2405 : current_rel = create_grouping_paths(root,
1888 : current_rel,
1889 : grouping_target,
1890 : &agg_costs,
1891 : gset_data);
1892 : /* Fix things up if grouping_target contains SRFs */
1893 2404 : if (parse->hasTargetSRFs)
1894 19 : adjust_paths_for_srfs(root, current_rel,
1895 : grouping_targets,
1896 : grouping_targets_contain_srfs);
1897 : }
1898 :
1899 : /*
1900 : * If we have window functions, consider ways to implement those. We
1901 : * build a new upperrel representing the output of this phase.
1902 : */
1903 25169 : if (activeWindows)
1904 : {
1905 133 : current_rel = create_window_paths(root,
1906 : current_rel,
1907 : grouping_target,
1908 : sort_input_target,
1909 : tlist,
1910 : wflists,
1911 : activeWindows);
1912 : /* Fix things up if sort_input_target contains SRFs */
1913 133 : if (parse->hasTargetSRFs)
1914 2 : adjust_paths_for_srfs(root, current_rel,
1915 : sort_input_targets,
1916 : sort_input_targets_contain_srfs);
1917 : }
1918 :
1919 : /*
1920 : * If there is a DISTINCT clause, consider ways to implement that. We
1921 : * build a new upperrel representing the output of this phase.
1922 : */
1923 25169 : if (parse->distinctClause)
1924 : {
1925 60 : current_rel = create_distinct_paths(root,
1926 : current_rel);
1927 : }
1928 : } /* end of if (setOperations) */
1929 :
1930 : /*
1931 : * If ORDER BY was given, consider ways to implement that, and generate a
1932 : * new upperrel containing only paths that emit the correct ordering and
1933 : * project the correct final_target. We can apply the original
1934 : * limit_tuples limit in sort costing here, but only if there are no
1935 : * postponed SRFs.
1936 : */
1937 25296 : if (parse->sortClause)
1938 : {
1939 2816 : current_rel = create_ordered_paths(root,
1940 : current_rel,
1941 : final_target,
1942 2816 : have_postponed_srfs ? -1.0 :
1943 : limit_tuples);
1944 : /* Fix things up if final_target contains SRFs */
1945 2816 : if (parse->hasTargetSRFs)
1946 26 : adjust_paths_for_srfs(root, current_rel,
1947 : final_targets,
1948 : final_targets_contain_srfs);
1949 : }
1950 :
1951 : /*
1952 : * Now we are prepared to build the final-output upperrel.
1953 : */
1954 25296 : final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1955 :
1956 : /*
1957 : * If the input rel is marked consider_parallel and there's nothing that's
1958 : * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1959 : * consider_parallel as well. Note that if the query has rowMarks or is
1960 : * not a SELECT, consider_parallel will be false for every relation in the
1961 : * query.
1962 : */
1963 37782 : if (current_rel->consider_parallel &&
1964 24968 : is_parallel_safe(root, parse->limitOffset) &&
1965 12482 : is_parallel_safe(root, parse->limitCount))
1966 12481 : final_rel->consider_parallel = true;
1967 :
1968 : /*
1969 : * If the current_rel belongs to a single FDW, so does the final_rel.
1970 : */
1971 25296 : final_rel->serverid = current_rel->serverid;
1972 25296 : final_rel->userid = current_rel->userid;
1973 25296 : final_rel->useridiscurrent = current_rel->useridiscurrent;
1974 25296 : final_rel->fdwroutine = current_rel->fdwroutine;
1975 :
1976 : /*
1977 : * Generate paths for the final_rel. Insert all surviving paths, with
1978 : * LockRows, Limit, and/or ModifyTable steps added if needed.
1979 : */
1980 50884 : foreach(lc, current_rel->pathlist)
1981 : {
1982 25588 : Path *path = (Path *) lfirst(lc);
1983 :
1984 : /*
1985 : * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1986 : * (Note: we intentionally test parse->rowMarks not root->rowMarks
1987 : * here. If there are only non-locking rowmarks, they should be
1988 : * handled by the ModifyTable node instead. However, root->rowMarks
1989 : * is what goes into the LockRows node.)
1990 : */
1991 25588 : if (parse->rowMarks)
1992 : {
1993 332 : path = (Path *) create_lockrows_path(root, final_rel, path,
1994 : root->rowMarks,
1995 : SS_assign_special_param(root));
1996 : }
1997 :
1998 : /*
1999 : * If there is a LIMIT/OFFSET clause, add the LIMIT node.
2000 : */
2001 25588 : if (limit_needed(parse))
2002 : {
2003 205 : path = (Path *) create_limit_path(root, final_rel, path,
2004 : parse->limitOffset,
2005 : parse->limitCount,
2006 : offset_est, count_est);
2007 : }
2008 :
2009 : /*
2010 : * If this is an INSERT/UPDATE/DELETE, and we're not being called from
2011 : * inheritance_planner, add the ModifyTable node.
2012 : */
2013 25588 : if (parse->commandType != CMD_SELECT && !inheritance_update)
2014 : {
2015 : List *withCheckOptionLists;
2016 : List *returningLists;
2017 : List *rowMarks;
2018 :
2019 : /*
2020 : * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
2021 : * needed.
2022 : */
2023 4274 : if (parse->withCheckOptions)
2024 106 : withCheckOptionLists = list_make1(parse->withCheckOptions);
2025 : else
2026 4168 : withCheckOptionLists = NIL;
2027 :
2028 4274 : if (parse->returningList)
2029 200 : returningLists = list_make1(parse->returningList);
2030 : else
2031 4074 : returningLists = NIL;
2032 :
2033 : /*
2034 : * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2035 : * will have dealt with fetching non-locked marked rows, else we
2036 : * need to have ModifyTable do that.
2037 : */
2038 4274 : if (parse->rowMarks)
2039 0 : rowMarks = NIL;
2040 : else
2041 4274 : rowMarks = root->rowMarks;
2042 :
2043 4274 : path = (Path *)
2044 12822 : create_modifytable_path(root, final_rel,
2045 : parse->commandType,
2046 4274 : parse->canSetTag,
2047 4274 : parse->resultRelation,
2048 : NIL,
2049 : list_make1_int(parse->resultRelation),
2050 : list_make1(path),
2051 : list_make1(root),
2052 : withCheckOptionLists,
2053 : returningLists,
2054 : rowMarks,
2055 : parse->onConflict,
2056 : SS_assign_special_param(root));
2057 : }
2058 :
2059 : /* And shove it into final_rel */
2060 25588 : add_path(final_rel, path);
2061 : }
2062 :
2063 : /*
2064 : * If there is an FDW that's responsible for all baserels of the query,
2065 : * let it consider adding ForeignPaths.
2066 : */
2067 25296 : if (final_rel->fdwroutine &&
2068 0 : final_rel->fdwroutine->GetForeignUpperPaths)
2069 0 : final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2070 : current_rel, final_rel);
2071 :
2072 : /* Let extensions possibly add some more paths */
2073 25296 : if (create_upper_paths_hook)
2074 0 : (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2075 : current_rel, final_rel);
2076 :
2077 : /* Note: currently, we leave it to callers to do set_cheapest() */
2078 25296 : }
2079 :
2080 : /*
2081 : * Do preprocessing for groupingSets clause and related data. This handles the
2082 : * preliminary steps of expanding the grouping sets, organizing them into lists
2083 : * of rollups, and preparing annotations which will later be filled in with
2084 : * size estimates.
2085 : */
2086 : static grouping_sets_data *
2087 91 : preprocess_grouping_sets(PlannerInfo *root)
2088 : {
2089 91 : Query *parse = root->parse;
2090 : List *sets;
2091 91 : int maxref = 0;
2092 : ListCell *lc;
2093 : ListCell *lc_set;
2094 91 : grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data));
2095 :
2096 91 : parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
2097 :
2098 91 : gd->any_hashable = false;
2099 91 : gd->unhashable_refs = NULL;
2100 91 : gd->unsortable_refs = NULL;
2101 91 : gd->unsortable_sets = NIL;
2102 :
2103 91 : if (parse->groupClause)
2104 : {
2105 : ListCell *lc;
2106 :
2107 277 : foreach(lc, parse->groupClause)
2108 : {
2109 193 : SortGroupClause *gc = lfirst(lc);
2110 193 : Index ref = gc->tleSortGroupRef;
2111 :
2112 193 : if (ref > maxref)
2113 191 : maxref = ref;
2114 :
2115 193 : if (!gc->hashable)
2116 5 : gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref);
2117 :
2118 193 : if (!OidIsValid(gc->sortop))
2119 6 : gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref);
2120 : }
2121 : }
2122 :
2123 : /* Allocate workspace array for remapping */
2124 91 : gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
2125 :
2126 : /*
2127 : * If we have any unsortable sets, we must extract them before trying to
2128 : * prepare rollups. Unsortable sets don't go through
2129 : * reorder_grouping_sets, so we must apply the GroupingSetData annotation
2130 : * here.
2131 : */
2132 91 : if (!bms_is_empty(gd->unsortable_refs))
2133 : {
2134 6 : List *sortable_sets = NIL;
2135 :
2136 18 : foreach(lc, parse->groupingSets)
2137 : {
2138 13 : List *gset = lfirst(lc);
2139 :
2140 13 : if (bms_overlap_list(gd->unsortable_refs, gset))
2141 : {
2142 6 : GroupingSetData *gs = makeNode(GroupingSetData);
2143 :
2144 6 : gs->set = gset;
2145 6 : gd->unsortable_sets = lappend(gd->unsortable_sets, gs);
2146 :
2147 : /*
2148 : * We must enforce here that an unsortable set is hashable;
2149 : * later code assumes this. Parse analysis only checks that
2150 : * every individual column is either hashable or sortable.
2151 : *
2152 : * Note that passing this test doesn't guarantee we can
2153 : * generate a plan; there might be other showstoppers.
2154 : */
2155 6 : if (bms_overlap_list(gd->unhashable_refs, gset))
2156 1 : ereport(ERROR,
2157 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2158 : errmsg("could not implement GROUP BY"),
2159 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2160 : }
2161 : else
2162 7 : sortable_sets = lappend(sortable_sets, gset);
2163 : }
2164 :
2165 5 : if (sortable_sets)
2166 5 : sets = extract_rollup_sets(sortable_sets);
2167 : else
2168 0 : sets = NIL;
2169 : }
2170 : else
2171 85 : sets = extract_rollup_sets(parse->groupingSets);
2172 :
2173 237 : foreach(lc_set, sets)
2174 : {
2175 147 : List *current_sets = (List *) lfirst(lc_set);
2176 147 : RollupData *rollup = makeNode(RollupData);
2177 : GroupingSetData *gs;
2178 :
2179 : /*
2180 : * Reorder the current list of grouping sets into correct prefix
2181 : * order. If only one aggregation pass is needed, try to make the
2182 : * list match the ORDER BY clause; if more than one pass is needed, we
2183 : * don't bother with that.
2184 : *
2185 : * Note that this reorders the sets from smallest-member-first to
2186 : * largest-member-first, and applies the GroupingSetData annotations,
2187 : * though the data will be filled in later.
2188 : */
2189 147 : current_sets = reorder_grouping_sets(current_sets,
2190 147 : (list_length(sets) == 1
2191 : ? parse->sortClause
2192 : : NIL));
2193 :
2194 : /*
2195 : * Get the initial (and therefore largest) grouping set.
2196 : */
2197 147 : gs = linitial(current_sets);
2198 :
2199 : /*
2200 : * Order the groupClause appropriately. If the first grouping set is
2201 : * empty, then the groupClause must also be empty; otherwise we have
2202 : * to force the groupClause to match that grouping set's order.
2203 : *
2204 : * (The first grouping set can be empty even though parse->groupClause
2205 : * is not empty only if all non-empty grouping sets are unsortable.
2206 : * The groupClauses for hashed grouping sets are built later on.)
2207 : */
2208 147 : if (gs->set)
2209 140 : rollup->groupClause = preprocess_groupclause(root, gs->set);
2210 : else
2211 7 : rollup->groupClause = NIL;
2212 :
2213 : /*
2214 : * Is it hashable? We pretend empty sets are hashable even though we
2215 : * actually force them not to be hashed later. But don't bother if
2216 : * there's nothing but empty sets (since in that case we can't hash
2217 : * anything).
2218 : */
2219 287 : if (gs->set &&
2220 140 : !bms_overlap_list(gd->unhashable_refs, gs->set))
2221 : {
2222 136 : rollup->hashable = true;
2223 136 : gd->any_hashable = true;
2224 : }
2225 :
2226 : /*
2227 : * Now that we've pinned down an order for the groupClause for this
2228 : * list of grouping sets, we need to remap the entries in the grouping
2229 : * sets from sortgrouprefs to plain indices (0-based) into the
2230 : * groupClause for this collection of grouping sets. We keep the
2231 : * original form for later use, though.
2232 : */
2233 147 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
2234 : current_sets,
2235 : gd->tleref_to_colnum_map);
2236 147 : rollup->gsets_data = current_sets;
2237 :
2238 147 : gd->rollups = lappend(gd->rollups, rollup);
2239 : }
2240 :
2241 90 : if (gd->unsortable_sets)
2242 : {
2243 : /*
2244 : * We have not yet pinned down a groupclause for this, but we will
2245 : * need index-based lists for estimation purposes. Construct
2246 : * hash_sets_idx based on the entire original groupclause for now.
2247 : */
2248 5 : gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause,
2249 : gd->unsortable_sets,
2250 : gd->tleref_to_colnum_map);
2251 5 : gd->any_hashable = true;
2252 : }
2253 :
2254 90 : return gd;
2255 : }
2256 :
2257 : /*
2258 : * Given a groupclause and a list of GroupingSetData, return equivalent sets
2259 : * (without annotation) mapped to indexes into the given groupclause.
2260 : */
2261 : static List *
2262 393 : remap_to_groupclause_idx(List *groupClause,
2263 : List *gsets,
2264 : int *tleref_to_colnum_map)
2265 : {
2266 393 : int ref = 0;
2267 393 : List *result = NIL;
2268 : ListCell *lc;
2269 :
2270 962 : foreach(lc, groupClause)
2271 : {
2272 569 : SortGroupClause *gc = lfirst(lc);
2273 :
2274 569 : tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
2275 : }
2276 :
2277 920 : foreach(lc, gsets)
2278 : {
2279 527 : List *set = NIL;
2280 : ListCell *lc2;
2281 527 : GroupingSetData *gs = lfirst(lc);
2282 :
2283 1164 : foreach(lc2, gs->set)
2284 : {
2285 637 : set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]);
2286 : }
2287 :
2288 527 : result = lappend(result, set);
2289 : }
2290 :
2291 393 : return result;
2292 : }
2293 :
2294 :
2295 :
2296 : /*
2297 : * Detect whether a plan node is a "dummy" plan created when a relation
2298 : * is deemed not to need scanning due to constraint exclusion.
2299 : *
2300 : * Currently, such dummy plans are Result nodes with constant FALSE
2301 : * filter quals (see set_dummy_rel_pathlist and create_append_plan).
2302 : *
2303 : * XXX this probably ought to be somewhere else, but not clear where.
2304 : */
2305 : bool
2306 0 : is_dummy_plan(Plan *plan)
2307 : {
2308 0 : if (IsA(plan, Result))
2309 : {
2310 0 : List *rcqual = (List *) ((Result *) plan)->resconstantqual;
2311 :
2312 0 : if (list_length(rcqual) == 1)
2313 : {
2314 0 : Const *constqual = (Const *) linitial(rcqual);
2315 :
2316 0 : if (constqual && IsA(constqual, Const))
2317 : {
2318 0 : if (!constqual->constisnull &&
2319 0 : !DatumGetBool(constqual->constvalue))
2320 0 : return true;
2321 : }
2322 : }
2323 : }
2324 0 : return false;
2325 : }
2326 :
2327 : /*
2328 : * preprocess_rowmarks - set up PlanRowMarks if needed
2329 : */
2330 : static void
2331 25426 : preprocess_rowmarks(PlannerInfo *root)
2332 : {
2333 25426 : Query *parse = root->parse;
2334 : Bitmapset *rels;
2335 : List *prowmarks;
2336 : ListCell *l;
2337 : int i;
2338 :
2339 25426 : if (parse->rowMarks)
2340 : {
2341 : /*
2342 : * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2343 : * grouping, since grouping renders a reference to individual tuple
2344 : * CTIDs invalid. This is also checked at parse time, but that's
2345 : * insufficient because of rule substitution, query pullup, etc.
2346 : */
2347 328 : CheckSelectLocking(parse, ((RowMarkClause *)
2348 328 : linitial(parse->rowMarks))->strength);
2349 : }
2350 : else
2351 : {
2352 : /*
2353 : * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2354 : * UPDATE/SHARE.
2355 : */
2356 49635 : if (parse->commandType != CMD_UPDATE &&
2357 24537 : parse->commandType != CMD_DELETE)
2358 49659 : return;
2359 : }
2360 :
2361 : /*
2362 : * We need to have rowmarks for all base relations except the target. We
2363 : * make a bitmapset of all base rels and then remove the items we don't
2364 : * need or have FOR [KEY] UPDATE/SHARE marks for.
2365 : */
2366 1193 : rels = get_relids_in_jointree((Node *) parse->jointree, false);
2367 1193 : if (parse->resultRelation)
2368 865 : rels = bms_del_member(rels, parse->resultRelation);
2369 :
2370 : /*
2371 : * Convert RowMarkClauses to PlanRowMark representation.
2372 : */
2373 1193 : prowmarks = NIL;
2374 1522 : foreach(l, parse->rowMarks)
2375 : {
2376 329 : RowMarkClause *rc = (RowMarkClause *) lfirst(l);
2377 329 : RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2378 : PlanRowMark *newrc;
2379 :
2380 : /*
2381 : * Currently, it is syntactically impossible to have FOR UPDATE et al
2382 : * applied to an update/delete target rel. If that ever becomes
2383 : * possible, we should drop the target from the PlanRowMark list.
2384 : */
2385 329 : Assert(rc->rti != parse->resultRelation);
2386 :
2387 : /*
2388 : * Ignore RowMarkClauses for subqueries; they aren't real tables and
2389 : * can't support true locking. Subqueries that got flattened into the
2390 : * main query should be ignored completely. Any that didn't will get
2391 : * ROW_MARK_COPY items in the next loop.
2392 : */
2393 329 : if (rte->rtekind != RTE_RELATION)
2394 0 : continue;
2395 :
2396 329 : rels = bms_del_member(rels, rc->rti);
2397 :
2398 329 : newrc = makeNode(PlanRowMark);
2399 329 : newrc->rti = newrc->prti = rc->rti;
2400 329 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2401 329 : newrc->markType = select_rowmark_type(rte, rc->strength);
2402 329 : newrc->allMarkTypes = (1 << newrc->markType);
2403 329 : newrc->strength = rc->strength;
2404 329 : newrc->waitPolicy = rc->waitPolicy;
2405 329 : newrc->isParent = false;
2406 :
2407 329 : prowmarks = lappend(prowmarks, newrc);
2408 : }
2409 :
2410 : /*
2411 : * Now, add rowmarks for any non-target, non-locked base relations.
2412 : */
2413 1193 : i = 0;
2414 3008 : foreach(l, parse->rtable)
2415 : {
2416 1815 : RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
2417 : PlanRowMark *newrc;
2418 :
2419 1815 : i++;
2420 1815 : if (!bms_is_member(i, rels))
2421 1650 : continue;
2422 :
2423 165 : newrc = makeNode(PlanRowMark);
2424 165 : newrc->rti = newrc->prti = i;
2425 165 : newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2426 165 : newrc->markType = select_rowmark_type(rte, LCS_NONE);
2427 165 : newrc->allMarkTypes = (1 << newrc->markType);
2428 165 : newrc->strength = LCS_NONE;
2429 165 : newrc->waitPolicy = LockWaitBlock; /* doesn't matter */
2430 165 : newrc->isParent = false;
2431 :
2432 165 : prowmarks = lappend(prowmarks, newrc);
2433 : }
2434 :
2435 1193 : root->rowMarks = prowmarks;
2436 : }
2437 :
2438 : /*
2439 : * Select RowMarkType to use for a given table
2440 : */
2441 : RowMarkType
2442 536 : select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
2443 : {
2444 536 : if (rte->rtekind != RTE_RELATION)
2445 : {
2446 : /* If it's not a table at all, use ROW_MARK_COPY */
2447 22 : return ROW_MARK_COPY;
2448 : }
2449 514 : else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2450 : {
2451 : /* Let the FDW select the rowmark type, if it wants to */
2452 0 : FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2453 :
2454 0 : if (fdwroutine->GetForeignRowMarkType != NULL)
2455 0 : return fdwroutine->GetForeignRowMarkType(rte, strength);
2456 : /* Otherwise, use ROW_MARK_COPY by default */
2457 0 : return ROW_MARK_COPY;
2458 : }
2459 : else
2460 : {
2461 : /* Regular table, apply the appropriate lock type */
2462 514 : switch (strength)
2463 : {
2464 : case LCS_NONE:
2465 :
2466 : /*
2467 : * We don't need a tuple lock, only the ability to re-fetch
2468 : * the row.
2469 : */
2470 165 : return ROW_MARK_REFERENCE;
2471 : break;
2472 : case LCS_FORKEYSHARE:
2473 306 : return ROW_MARK_KEYSHARE;
2474 : break;
2475 : case LCS_FORSHARE:
2476 21 : return ROW_MARK_SHARE;
2477 : break;
2478 : case LCS_FORNOKEYUPDATE:
2479 0 : return ROW_MARK_NOKEYEXCLUSIVE;
2480 : break;
2481 : case LCS_FORUPDATE:
2482 22 : return ROW_MARK_EXCLUSIVE;
2483 : break;
2484 : }
2485 0 : elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2486 : return ROW_MARK_EXCLUSIVE; /* keep compiler quiet */
2487 : }
2488 : }
2489 :
2490 : /*
2491 : * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2492 : *
2493 : * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2494 : * results back in *count_est and *offset_est. These variables are set to
2495 : * 0 if the corresponding clause is not present, and -1 if it's present
2496 : * but we couldn't estimate the value for it. (The "0" convention is OK
2497 : * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2498 : * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's
2499 : * usual practice of never estimating less than one row.) These values will
2500 : * be passed to create_limit_path, which see if you change this code.
2501 : *
2502 : * The return value is the suitably adjusted tuple_fraction to use for
2503 : * planning the query. This adjustment is not overridable, since it reflects
2504 : * plan actions that grouping_planner() will certainly take, not assumptions
2505 : * about context.
2506 : */
2507 : static double
2508 218 : preprocess_limit(PlannerInfo *root, double tuple_fraction,
2509 : int64 *offset_est, int64 *count_est)
2510 : {
2511 218 : Query *parse = root->parse;
2512 : Node *est;
2513 : double limit_fraction;
2514 :
2515 : /* Should not be called unless LIMIT or OFFSET */
2516 218 : Assert(parse->limitCount || parse->limitOffset);
2517 :
2518 : /*
2519 : * Try to obtain the clause values. We use estimate_expression_value
2520 : * primarily because it can sometimes do something useful with Params.
2521 : */
2522 218 : if (parse->limitCount)
2523 : {
2524 177 : est = estimate_expression_value(root, parse->limitCount);
2525 177 : if (est && IsA(est, Const))
2526 : {
2527 352 : if (((Const *) est)->constisnull)
2528 : {
2529 : /* NULL indicates LIMIT ALL, ie, no limit */
2530 0 : *count_est = 0; /* treat as not present */
2531 : }
2532 : else
2533 : {
2534 176 : *count_est = DatumGetInt64(((Const *) est)->constvalue);
2535 176 : if (*count_est <= 0)
2536 24 : *count_est = 1; /* force to at least 1 */
2537 : }
2538 : }
2539 : else
2540 1 : *count_est = -1; /* can't estimate */
2541 : }
2542 : else
2543 41 : *count_est = 0; /* not present */
2544 :
2545 218 : if (parse->limitOffset)
2546 : {
2547 51 : est = estimate_expression_value(root, parse->limitOffset);
2548 51 : if (est && IsA(est, Const))
2549 : {
2550 94 : if (((Const *) est)->constisnull)
2551 : {
2552 : /* Treat NULL as no offset; the executor will too */
2553 0 : *offset_est = 0; /* treat as not present */
2554 : }
2555 : else
2556 : {
2557 47 : *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2558 47 : if (*offset_est < 0)
2559 0 : *offset_est = 0; /* treat as not present */
2560 : }
2561 : }
2562 : else
2563 4 : *offset_est = -1; /* can't estimate */
2564 : }
2565 : else
2566 167 : *offset_est = 0; /* not present */
2567 :
2568 218 : if (*count_est != 0)
2569 : {
2570 : /*
2571 : * A LIMIT clause limits the absolute number of tuples returned.
2572 : * However, if it's not a constant LIMIT then we have to guess; for
2573 : * lack of a better idea, assume 10% of the plan's result is wanted.
2574 : */
2575 177 : if (*count_est < 0 || *offset_est < 0)
2576 : {
2577 : /* LIMIT or OFFSET is an expression ... punt ... */
2578 4 : limit_fraction = 0.10;
2579 : }
2580 : else
2581 : {
2582 : /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2583 173 : limit_fraction = (double) *count_est + (double) *offset_est;
2584 : }
2585 :
2586 : /*
2587 : * If we have absolute limits from both caller and LIMIT, use the
2588 : * smaller value; likewise if they are both fractional. If one is
2589 : * fractional and the other absolute, we can't easily determine which
2590 : * is smaller, but we use the heuristic that the absolute will usually
2591 : * be smaller.
2592 : */
2593 177 : if (tuple_fraction >= 1.0)
2594 : {
2595 1 : if (limit_fraction >= 1.0)
2596 : {
2597 : /* both absolute */
2598 1 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2599 : }
2600 : else
2601 : {
2602 : /* caller absolute, limit fractional; use caller's value */
2603 : }
2604 : }
2605 176 : else if (tuple_fraction > 0.0)
2606 : {
2607 3 : if (limit_fraction >= 1.0)
2608 : {
2609 : /* caller fractional, limit absolute; use limit */
2610 3 : tuple_fraction = limit_fraction;
2611 : }
2612 : else
2613 : {
2614 : /* both fractional */
2615 0 : tuple_fraction = Min(tuple_fraction, limit_fraction);
2616 : }
2617 : }
2618 : else
2619 : {
2620 : /* no info from caller, just use limit */
2621 173 : tuple_fraction = limit_fraction;
2622 : }
2623 : }
2624 41 : else if (*offset_est != 0 && tuple_fraction > 0.0)
2625 : {
2626 : /*
2627 : * We have an OFFSET but no LIMIT. This acts entirely differently
2628 : * from the LIMIT case: here, we need to increase rather than decrease
2629 : * the caller's tuple_fraction, because the OFFSET acts to cause more
2630 : * tuples to be fetched instead of fewer. This only matters if we got
2631 : * a tuple_fraction > 0, however.
2632 : *
2633 : * As above, use 10% if OFFSET is present but unestimatable.
2634 : */
2635 2 : if (*offset_est < 0)
2636 0 : limit_fraction = 0.10;
2637 : else
2638 2 : limit_fraction = (double) *offset_est;
2639 :
2640 : /*
2641 : * If we have absolute counts from both caller and OFFSET, add them
2642 : * together; likewise if they are both fractional. If one is
2643 : * fractional and the other absolute, we want to take the larger, and
2644 : * we heuristically assume that's the fractional one.
2645 : */
2646 2 : if (tuple_fraction >= 1.0)
2647 : {
2648 0 : if (limit_fraction >= 1.0)
2649 : {
2650 : /* both absolute, so add them together */
2651 0 : tuple_fraction += limit_fraction;
2652 : }
2653 : else
2654 : {
2655 : /* caller absolute, limit fractional; use limit */
2656 0 : tuple_fraction = limit_fraction;
2657 : }
2658 : }
2659 : else
2660 : {
2661 2 : if (limit_fraction >= 1.0)
2662 : {
2663 : /* caller fractional, limit absolute; use caller's value */
2664 : }
2665 : else
2666 : {
2667 : /* both fractional, so add them together */
2668 0 : tuple_fraction += limit_fraction;
2669 0 : if (tuple_fraction >= 1.0)
2670 0 : tuple_fraction = 0.0; /* assume fetch all */
2671 : }
2672 : }
2673 : }
2674 :
2675 218 : return tuple_fraction;
2676 : }
2677 :
2678 : /*
2679 : * limit_needed - do we actually need a Limit plan node?
2680 : *
2681 : * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2682 : * a Limit node. This is worth checking for because "OFFSET 0" is a common
2683 : * locution for an optimization fence. (Because other places in the planner
2684 : * merely check whether parse->limitOffset isn't NULL, it will still work as
2685 : * an optimization fence --- we're just suppressing unnecessary run-time
2686 : * overhead.)
2687 : *
2688 : * This might look like it could be merged into preprocess_limit, but there's
2689 : * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2690 : * in preprocess_limit it's good enough to consider estimated values.
2691 : */
2692 : static bool
2693 25588 : limit_needed(Query *parse)
2694 : {
2695 : Node *node;
2696 :
2697 25588 : node = parse->limitCount;
2698 25588 : if (node)
2699 : {
2700 201 : if (IsA(node, Const))
2701 : {
2702 : /* NULL indicates LIMIT ALL, ie, no limit */
2703 199 : if (!((Const *) node)->constisnull)
2704 199 : return true; /* LIMIT with a constant value */
2705 : }
2706 : else
2707 2 : return true; /* non-constant LIMIT */
2708 : }
2709 :
2710 25387 : node = parse->limitOffset;
2711 25387 : if (node)
2712 : {
2713 41 : if (IsA(node, Const))
2714 : {
2715 : /* Treat NULL as no offset; the executor would too */
2716 40 : if (!((Const *) node)->constisnull)
2717 : {
2718 40 : int64 offset = DatumGetInt64(((Const *) node)->constvalue);
2719 :
2720 40 : if (offset != 0)
2721 3 : return true; /* OFFSET with a nonzero value */
2722 : }
2723 : }
2724 : else
2725 1 : return true; /* non-constant OFFSET */
2726 : }
2727 :
2728 25383 : return false; /* don't need a Limit plan node */
2729 : }
2730 :
2731 :
2732 : /*
2733 : * remove_useless_groupby_columns
2734 : * Remove any columns in the GROUP BY clause that are redundant due to
2735 : * being functionally dependent on other GROUP BY columns.
2736 : *
2737 : * Since some other DBMSes do not allow references to ungrouped columns, it's
2738 : * not unusual to find all columns listed in GROUP BY even though listing the
2739 : * primary-key columns would be sufficient. Deleting such excess columns
2740 : * avoids redundant sorting work, so it's worth doing. When we do this, we
2741 : * must mark the plan as dependent on the pkey constraint (compare the
2742 : * parser's check_ungrouped_columns() and check_functional_grouping()).
2743 : *
2744 : * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
2745 : * index as the determining columns. But as with check_functional_grouping(),
2746 : * there's currently no way to represent dependency on a NOT NULL constraint,
2747 : * so we consider only the pkey for now.
2748 : */
2749 : static void
2750 25178 : remove_useless_groupby_columns(PlannerInfo *root)
2751 : {
2752 25178 : Query *parse = root->parse;
2753 : Bitmapset **groupbyattnos;
2754 : Bitmapset **surplusvars;
2755 : ListCell *lc;
2756 : int relid;
2757 :
2758 : /* No chance to do anything if there are less than two GROUP BY items */
2759 25178 : if (list_length(parse->groupClause) < 2)
2760 25033 : return;
2761 :
2762 : /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2763 145 : if (parse->groupingSets)
2764 74 : return;
2765 :
2766 : /*
2767 : * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2768 : * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2769 : * that are GROUP BY items.
2770 : */
2771 71 : groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2772 71 : (list_length(parse->rtable) + 1));
2773 258 : foreach(lc, parse->groupClause)
2774 : {
2775 187 : SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2776 187 : TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2777 187 : Var *var = (Var *) tle->expr;
2778 :
2779 : /*
2780 : * Ignore non-Vars and Vars from other query levels.
2781 : *
2782 : * XXX in principle, stable expressions containing Vars could also be
2783 : * removed, if all the Vars are functionally dependent on other GROUP
2784 : * BY items. But it's not clear that such cases occur often enough to
2785 : * be worth troubling over.
2786 : */
2787 352 : if (!IsA(var, Var) ||
2788 165 : var->varlevelsup > 0)
2789 22 : continue;
2790 :
2791 : /* OK, remember we have this Var */
2792 165 : relid = var->varno;
2793 165 : Assert(relid <= list_length(parse->rtable));
2794 330 : groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2795 165 : var->varattno - FirstLowInvalidHeapAttributeNumber);
2796 : }
2797 :
2798 : /*
2799 : * Consider each relation and see if it is possible to remove some of its
2800 : * Vars from GROUP BY. For simplicity and speed, we do the actual removal
2801 : * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
2802 : * of the column attnos of RTE k that are removable GROUP BY items.
2803 : */
2804 71 : surplusvars = NULL; /* don't allocate array unless required */
2805 71 : relid = 0;
2806 235 : foreach(lc, parse->rtable)
2807 : {
2808 164 : RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
2809 : Bitmapset *relattnos;
2810 : Bitmapset *pkattnos;
2811 : Oid constraintOid;
2812 :
2813 164 : relid++;
2814 :
2815 : /* Only plain relations could have primary-key constraints */
2816 164 : if (rte->rtekind != RTE_RELATION)
2817 194 : continue;
2818 :
2819 : /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2820 125 : relattnos = groupbyattnos[relid];
2821 125 : if (bms_membership(relattnos) != BMS_MULTIPLE)
2822 69 : continue;
2823 :
2824 : /*
2825 : * Can't remove any columns for this rel if there is no suitable
2826 : * (i.e., nondeferrable) primary key constraint.
2827 : */
2828 56 : pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2829 56 : if (pkattnos == NULL)
2830 47 : continue;
2831 :
2832 : /*
2833 : * If the primary key is a proper subset of relattnos then we have
2834 : * some items in the GROUP BY that can be removed.
2835 : */
2836 9 : if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2837 : {
2838 : /*
2839 : * To easily remember whether we've found anything to do, we don't
2840 : * allocate the surplusvars[] array until we find something.
2841 : */
2842 6 : if (surplusvars == NULL)
2843 5 : surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2844 5 : (list_length(parse->rtable) + 1));
2845 :
2846 : /* Remember the attnos of the removable columns */
2847 6 : surplusvars[relid] = bms_difference(relattnos, pkattnos);
2848 :
2849 : /* Also, mark the resulting plan as dependent on this constraint */
2850 6 : parse->constraintDeps = lappend_oid(parse->constraintDeps,
2851 : constraintOid);
2852 : }
2853 : }
2854 :
2855 : /*
2856 : * If we found any surplus Vars, build a new GROUP BY clause without them.
2857 : * (Note: this may leave some TLEs with unreferenced ressortgroupref
2858 : * markings, but that's harmless.)
2859 : */
2860 71 : if (surplusvars != NULL)
2861 : {
2862 5 : List *new_groupby = NIL;
2863 :
2864 26 : foreach(lc, parse->groupClause)
2865 : {
2866 21 : SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2867 21 : TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2868 21 : Var *var = (Var *) tle->expr;
2869 :
2870 : /*
2871 : * New list must include non-Vars, outer Vars, and anything not
2872 : * marked as surplus.
2873 : */
2874 42 : if (!IsA(var, Var) ||
2875 42 : var->varlevelsup > 0 ||
2876 21 : !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
2877 21 : surplusvars[var->varno]))
2878 12 : new_groupby = lappend(new_groupby, sgc);
2879 : }
2880 :
2881 5 : parse->groupClause = new_groupby;
2882 : }
2883 : }
2884 :
2885 : /*
2886 : * preprocess_groupclause - do preparatory work on GROUP BY clause
2887 : *
2888 : * The idea here is to adjust the ordering of the GROUP BY elements
2889 : * (which in itself is semantically insignificant) to match ORDER BY,
2890 : * thereby allowing a single sort operation to both implement the ORDER BY
2891 : * requirement and set up for a Unique step that implements GROUP BY.
2892 : *
2893 : * In principle it might be interesting to consider other orderings of the
2894 : * GROUP BY elements, which could match the sort ordering of other
2895 : * possible plans (eg an indexscan) and thereby reduce cost. We don't
2896 : * bother with that, though. Hashed grouping will frequently win anyway.
2897 : *
2898 : * Note: we need no comparable processing of the distinctClause because
2899 : * the parser already enforced that that matches ORDER BY.
2900 : *
2901 : * For grouping sets, the order of items is instead forced to agree with that
2902 : * of the grouping set (and items not in the grouping set are skipped). The
2903 : * work of sorting the order of grouping set elements to match the ORDER BY if
2904 : * possible is done elsewhere.
2905 : */
2906 : static List *
2907 598 : preprocess_groupclause(PlannerInfo *root, List *force)
2908 : {
2909 598 : Query *parse = root->parse;
2910 598 : List *new_groupclause = NIL;
2911 : bool partial_match;
2912 : ListCell *sl;
2913 : ListCell *gl;
2914 :
2915 : /* For grouping sets, we need to force the ordering */
2916 598 : if (force)
2917 : {
2918 938 : foreach(sl, force)
2919 : {
2920 557 : Index ref = lfirst_int(sl);
2921 557 : SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2922 :
2923 557 : new_groupclause = lappend(new_groupclause, cl);
2924 : }
2925 :
2926 381 : return new_groupclause;
2927 : }
2928 :
2929 : /* If no ORDER BY, nothing useful to do here */
2930 217 : if (parse->sortClause == NIL)
2931 123 : return parse->groupClause;
2932 :
2933 : /*
2934 : * Scan the ORDER BY clause and construct a list of matching GROUP BY
2935 : * items, but only as far as we can make a matching prefix.
2936 : *
2937 : * This code assumes that the sortClause contains no duplicate items.
2938 : */
2939 187 : foreach(sl, parse->sortClause)
2940 : {
2941 111 : SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
2942 :
2943 147 : foreach(gl, parse->groupClause)
2944 : {
2945 129 : SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2946 :
2947 129 : if (equal(gc, sc))
2948 : {
2949 93 : new_groupclause = lappend(new_groupclause, gc);
2950 93 : break;
2951 : }
2952 : }
2953 111 : if (gl == NULL)
2954 18 : break; /* no match, so stop scanning */
2955 : }
2956 :
2957 : /* Did we match all of the ORDER BY list, or just some of it? */
2958 94 : partial_match = (sl != NULL);
2959 :
2960 : /* If no match at all, no point in reordering GROUP BY */
2961 94 : if (new_groupclause == NIL)
2962 10 : return parse->groupClause;
2963 :
2964 : /*
2965 : * Add any remaining GROUP BY items to the new list, but only if we were
2966 : * able to make a complete match. In other words, we only rearrange the
2967 : * GROUP BY list if the result is that one list is a prefix of the other
2968 : * --- otherwise there's no possibility of a common sort. Also, give up
2969 : * if there are any non-sortable GROUP BY items, since then there's no
2970 : * hope anyway.
2971 : */
2972 183 : foreach(gl, parse->groupClause)
2973 : {
2974 102 : SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2975 :
2976 102 : if (list_member_ptr(new_groupclause, gc))
2977 93 : continue; /* it matched an ORDER BY item */
2978 9 : if (partial_match)
2979 3 : return parse->groupClause; /* give up, no common sort possible */
2980 6 : if (!OidIsValid(gc->sortop))
2981 0 : return parse->groupClause; /* give up, GROUP BY can't be sorted */
2982 6 : new_groupclause = lappend(new_groupclause, gc);
2983 : }
2984 :
2985 : /* Success --- install the rearranged GROUP BY list */
2986 81 : Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2987 81 : return new_groupclause;
2988 : }
2989 :
2990 : /*
2991 : * Extract lists of grouping sets that can be implemented using a single
2992 : * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2993 : *
2994 : * Input must be sorted with smallest sets first. Result has each sublist
2995 : * sorted with smallest sets first.
2996 : *
2997 : * We want to produce the absolute minimum possible number of lists here to
2998 : * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2999 : * of finding the minimal partition of a partially-ordered set into chains
3000 : * (which is what we need, taking the list of grouping sets as a poset ordered
3001 : * by set inclusion) can be mapped to the problem of finding the maximum
3002 : * cardinality matching on a bipartite graph, which is solvable in polynomial
3003 : * time with a worst case of no worse than O(n^2.5) and usually much
3004 : * better. Since our N is at most 4096, we don't need to consider fallbacks to
3005 : * heuristic or approximate methods. (Planning time for a 12-d cube is under
3006 : * half a second on my modest system even with optimization off and assertions
3007 : * on.)
3008 : */
3009 : static List *
3010 90 : extract_rollup_sets(List *groupingSets)
3011 : {
3012 90 : int num_sets_raw = list_length(groupingSets);
3013 90 : int num_empty = 0;
3014 90 : int num_sets = 0; /* distinct sets */
3015 90 : int num_chains = 0;
3016 90 : List *result = NIL;
3017 : List **results;
3018 : List **orig_sets;
3019 : Bitmapset **set_masks;
3020 : int *chains;
3021 : short **adjacency;
3022 : short *adjacency_buf;
3023 : BipartiteMatchState *state;
3024 : int i;
3025 : int j;
3026 : int j_size;
3027 90 : ListCell *lc1 = list_head(groupingSets);
3028 : ListCell *lc;
3029 :
3030 : /*
3031 : * Start by stripping out empty sets. The algorithm doesn't require this,
3032 : * but the planner currently needs all empty sets to be returned in the
3033 : * first list, so we strip them here and add them back after.
3034 : */
3035 255 : while (lc1 && lfirst(lc1) == NIL)
3036 : {
3037 75 : ++num_empty;
3038 75 : lc1 = lnext(lc1);
3039 : }
3040 :
3041 : /* bail out now if it turns out that all we had were empty sets. */
3042 90 : if (!lc1)
3043 7 : return list_make1(groupingSets);
3044 :
3045 : /*----------
3046 : * We don't strictly need to remove duplicate sets here, but if we don't,
3047 : * they tend to become scattered through the result, which is a bit
3048 : * confusing (and irritating if we ever decide to optimize them out).
3049 : * So we remove them here and add them back after.
3050 : *
3051 : * For each non-duplicate set, we fill in the following:
3052 : *
3053 : * orig_sets[i] = list of the original set lists
3054 : * set_masks[i] = bitmapset for testing inclusion
3055 : * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
3056 : *
3057 : * chains[i] will be the result group this set is assigned to.
3058 : *
3059 : * We index all of these from 1 rather than 0 because it is convenient
3060 : * to leave 0 free for the NIL node in the graph algorithm.
3061 : *----------
3062 : */
3063 83 : orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
3064 83 : set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
3065 83 : adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
3066 83 : adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
3067 :
3068 83 : j_size = 0;
3069 83 : j = 0;
3070 83 : i = 1;
3071 :
3072 289 : for_each_cell(lc, lc1)
3073 : {
3074 206 : List *candidate = lfirst(lc);
3075 206 : Bitmapset *candidate_set = NULL;
3076 : ListCell *lc2;
3077 206 : int dup_of = 0;
3078 :
3079 498 : foreach(lc2, candidate)
3080 : {
3081 292 : candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
3082 : }
3083 :
3084 : /* we can only be a dup if we're the same length as a previous set */
3085 206 : if (j_size == list_length(candidate))
3086 : {
3087 : int k;
3088 :
3089 188 : for (k = j; k < i; ++k)
3090 : {
3091 123 : if (bms_equal(set_masks[k], candidate_set))
3092 : {
3093 13 : dup_of = k;
3094 13 : break;
3095 : }
3096 : }
3097 : }
3098 128 : else if (j_size < list_length(candidate))
3099 : {
3100 128 : j_size = list_length(candidate);
3101 128 : j = i;
3102 : }
3103 :
3104 206 : if (dup_of > 0)
3105 : {
3106 13 : orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
3107 13 : bms_free(candidate_set);
3108 : }
3109 : else
3110 : {
3111 : int k;
3112 193 : int n_adj = 0;
3113 :
3114 193 : orig_sets[i] = list_make1(candidate);
3115 193 : set_masks[i] = candidate_set;
3116 :
3117 : /* fill in adjacency list; no need to compare equal-size sets */
3118 :
3119 304 : for (k = j - 1; k > 0; --k)
3120 : {
3121 111 : if (bms_is_subset(set_masks[k], candidate_set))
3122 96 : adjacency_buf[++n_adj] = k;
3123 : }
3124 :
3125 193 : if (n_adj > 0)
3126 : {
3127 53 : adjacency_buf[0] = n_adj;
3128 53 : adjacency[i] = palloc((n_adj + 1) * sizeof(short));
3129 53 : memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
3130 : }
3131 : else
3132 140 : adjacency[i] = NULL;
3133 :
3134 193 : ++i;
3135 : }
3136 : }
3137 :
3138 83 : num_sets = i - 1;
3139 :
3140 : /*
3141 : * Apply the graph matching algorithm to do the work.
3142 : */
3143 83 : state = BipartiteMatch(num_sets, num_sets, adjacency);
3144 :
3145 : /*
3146 : * Now, the state->pair* fields have the info we need to assign sets to
3147 : * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
3148 : * pair_vu[v] = u (both will be true, but we check both so that we can do
3149 : * it in one pass)
3150 : */
3151 83 : chains = palloc0((num_sets + 1) * sizeof(int));
3152 :
3153 276 : for (i = 1; i <= num_sets; ++i)
3154 : {
3155 193 : int u = state->pair_vu[i];
3156 193 : int v = state->pair_uv[i];
3157 :
3158 193 : if (u > 0 && u < i)
3159 0 : chains[i] = chains[u];
3160 193 : else if (v > 0 && v < i)
3161 53 : chains[i] = chains[v];
3162 : else
3163 140 : chains[i] = ++num_chains;
3164 : }
3165 :
3166 : /* build result lists. */
3167 83 : results = palloc0((num_chains + 1) * sizeof(List *));
3168 :
3169 276 : for (i = 1; i <= num_sets; ++i)
3170 : {
3171 193 : int c = chains[i];
3172 :
3173 193 : Assert(c > 0);
3174 :
3175 193 : results[c] = list_concat(results[c], orig_sets[i]);
3176 : }
3177 :
3178 : /* push any empty sets back on the first list. */
3179 226 : while (num_empty-- > 0)
3180 60 : results[1] = lcons(NIL, results[1]);
3181 :
3182 : /* make result list */
3183 223 : for (i = 1; i <= num_chains; ++i)
3184 140 : result = lappend(result, results[i]);
3185 :
3186 : /*
3187 : * Free all the things.
3188 : *
3189 : * (This is over-fussy for small sets but for large sets we could have
3190 : * tied up a nontrivial amount of memory.)
3191 : */
3192 83 : BipartiteMatchFree(state);
3193 83 : pfree(results);
3194 83 : pfree(chains);
3195 276 : for (i = 1; i <= num_sets; ++i)
3196 193 : if (adjacency[i])
3197 53 : pfree(adjacency[i]);
3198 83 : pfree(adjacency);
3199 83 : pfree(adjacency_buf);
3200 83 : pfree(orig_sets);
3201 276 : for (i = 1; i <= num_sets; ++i)
3202 193 : bms_free(set_masks[i]);
3203 83 : pfree(set_masks);
3204 :
3205 83 : return result;
3206 : }
3207 :
3208 : /*
3209 : * Reorder the elements of a list of grouping sets such that they have correct
3210 : * prefix relationships. Also inserts the GroupingSetData annotations.
3211 : *
3212 : * The input must be ordered with smallest sets first; the result is returned
3213 : * with largest sets first. Note that the result shares no list substructure
3214 : * with the input, so it's safe for the caller to modify it later.
3215 : *
3216 : * If we're passed in a sortclause, we follow its order of columns to the
3217 : * extent possible, to minimize the chance that we add unnecessary sorts.
3218 : * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3219 : * gets implemented in one pass.)
3220 : */
3221 : static List *
3222 147 : reorder_grouping_sets(List *groupingsets, List *sortclause)
3223 : {
3224 : ListCell *lc;
3225 : ListCell *lc2;
3226 147 : List *previous = NIL;
3227 147 : List *result = NIL;
3228 :
3229 428 : foreach(lc, groupingsets)
3230 : {
3231 281 : List *candidate = lfirst(lc);
3232 281 : List *new_elems = list_difference_int(candidate, previous);
3233 281 : GroupingSetData *gs = makeNode(GroupingSetData);
3234 :
3235 281 : if (list_length(new_elems) > 0)
3236 : {
3237 392 : while (list_length(sortclause) > list_length(previous))
3238 : {
3239 24 : SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3240 24 : int ref = sc->tleSortGroupRef;
3241 :
3242 24 : if (list_member_int(new_elems, ref))
3243 : {
3244 6 : previous = lappend_int(previous, ref);
3245 6 : new_elems = list_delete_int(new_elems, ref);
3246 : }
3247 : else
3248 : {
3249 : /* diverged from the sortclause; give up on it */
3250 18 : sortclause = NIL;
3251 18 : break;
3252 : }
3253 : }
3254 :
3255 407 : foreach(lc2, new_elems)
3256 : {
3257 214 : previous = lappend_int(previous, lfirst_int(lc2));
3258 : }
3259 : }
3260 :
3261 281 : gs->set = list_copy(previous);
3262 281 : result = lcons(gs, result);
3263 281 : list_free(new_elems);
3264 : }
3265 :
3266 147 : list_free(previous);
3267 :
3268 147 : return result;
3269 : }
3270 :
3271 : /*
3272 : * Compute query_pathkeys and other pathkeys during plan generation
3273 : */
3274 : static void
3275 25170 : standard_qp_callback(PlannerInfo *root, void *extra)
3276 : {
3277 25170 : Query *parse = root->parse;
3278 25170 : standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3279 25170 : List *tlist = qp_extra->tlist;
3280 25170 : List *activeWindows = qp_extra->activeWindows;
3281 :
3282 : /*
3283 : * Calculate pathkeys that represent grouping/ordering requirements. The
3284 : * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3285 : * be, in which case we just leave their pathkeys empty.
3286 : */
3287 25470 : if (qp_extra->groupClause &&
3288 300 : grouping_is_sortable(qp_extra->groupClause))
3289 300 : root->group_pathkeys =
3290 300 : make_pathkeys_for_sortclauses(root,
3291 : qp_extra->groupClause,
3292 : tlist);
3293 : else
3294 24870 : root->group_pathkeys = NIL;
3295 :
3296 : /* We consider only the first (bottom) window in pathkeys logic */
3297 25170 : if (activeWindows != NIL)
3298 : {
3299 133 : WindowClause *wc = (WindowClause *) linitial(activeWindows);
3300 :
3301 133 : root->window_pathkeys = make_pathkeys_for_window(root,
3302 : wc,
3303 : tlist);
3304 : }
3305 : else
3306 25037 : root->window_pathkeys = NIL;
3307 :
3308 25230 : if (parse->distinctClause &&
3309 60 : grouping_is_sortable(parse->distinctClause))
3310 59 : root->distinct_pathkeys =
3311 59 : make_pathkeys_for_sortclauses(root,
3312 : parse->distinctClause,
3313 : tlist);
3314 : else
3315 25111 : root->distinct_pathkeys = NIL;
3316 :
3317 25170 : root->sort_pathkeys =
3318 25170 : make_pathkeys_for_sortclauses(root,
3319 : parse->sortClause,
3320 : tlist);
3321 :
3322 : /*
3323 : * Figure out whether we want a sorted result from query_planner.
3324 : *
3325 : * If we have a sortable GROUP BY clause, then we want a result sorted
3326 : * properly for grouping. Otherwise, if we have window functions to
3327 : * evaluate, we try to sort for the first window. Otherwise, if there's a
3328 : * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3329 : * we try to produce output that's sufficiently well sorted for the
3330 : * DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
3331 : * by the ORDER BY clause.
3332 : *
3333 : * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3334 : * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3335 : * that might just leave us failing to exploit an available sort order at
3336 : * all. Needs more thought. The choice for DISTINCT versus ORDER BY is
3337 : * much easier, since we know that the parser ensured that one is a
3338 : * superset of the other.
3339 : */
3340 25170 : if (root->group_pathkeys)
3341 285 : root->query_pathkeys = root->group_pathkeys;
3342 24885 : else if (root->window_pathkeys)
3343 103 : root->query_pathkeys = root->window_pathkeys;
3344 49564 : else if (list_length(root->distinct_pathkeys) >
3345 24782 : list_length(root->sort_pathkeys))
3346 30 : root->query_pathkeys = root->distinct_pathkeys;
3347 24752 : else if (root->sort_pathkeys)
3348 2598 : root->query_pathkeys = root->sort_pathkeys;
3349 : else
3350 22154 : root->query_pathkeys = NIL;
3351 25170 : }
3352 :
3353 : /*
3354 : * Estimate number of groups produced by grouping clauses (1 if not grouping)
3355 : *
3356 : * path_rows: number of output rows from scan/join step
3357 : * gsets: grouping set data, or NULL if not doing grouping sets
3358 : *
3359 : * If doing grouping sets, we also annotate the gsets data with the estimates
3360 : * for each set and each individual rollup list, with a view to later
3361 : * determining whether some combination of them could be hashed instead.
3362 : */
3363 : static double
3364 2447 : get_number_of_groups(PlannerInfo *root,
3365 : double path_rows,
3366 : grouping_sets_data *gd)
3367 : {
3368 2447 : Query *parse = root->parse;
3369 : double dNumGroups;
3370 :
3371 2447 : if (parse->groupClause)
3372 : {
3373 : List *groupExprs;
3374 :
3375 311 : if (parse->groupingSets)
3376 : {
3377 : /* Add up the estimates for each grouping set */
3378 : ListCell *lc;
3379 : ListCell *lc2;
3380 :
3381 83 : Assert(gd); /* keep Coverity happy */
3382 :
3383 83 : dNumGroups = 0;
3384 :
3385 223 : foreach(lc, gd->rollups)
3386 : {
3387 140 : RollupData *rollup = lfirst(lc);
3388 : ListCell *lc;
3389 :
3390 140 : groupExprs = get_sortgrouplist_exprs(rollup->groupClause,
3391 : parse->targetList);
3392 :
3393 140 : rollup->numGroups = 0.0;
3394 :
3395 406 : forboth(lc, rollup->gsets, lc2, rollup->gsets_data)
3396 : {
3397 266 : List *gset = (List *) lfirst(lc);
3398 266 : GroupingSetData *gs = lfirst(lc2);
3399 266 : double numGroups = estimate_num_groups(root,
3400 : groupExprs,
3401 : path_rows,
3402 : &gset);
3403 :
3404 266 : gs->numGroups = numGroups;
3405 266 : rollup->numGroups += numGroups;
3406 : }
3407 :
3408 140 : dNumGroups += rollup->numGroups;
3409 : }
3410 :
3411 83 : if (gd->hash_sets_idx)
3412 : {
3413 : ListCell *lc;
3414 :
3415 5 : gd->dNumHashGroups = 0;
3416 :
3417 5 : groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3418 : parse->targetList);
3419 :
3420 10 : forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets)
3421 : {
3422 5 : List *gset = (List *) lfirst(lc);
3423 5 : GroupingSetData *gs = lfirst(lc2);
3424 5 : double numGroups = estimate_num_groups(root,
3425 : groupExprs,
3426 : path_rows,
3427 : &gset);
3428 :
3429 5 : gs->numGroups = numGroups;
3430 5 : gd->dNumHashGroups += numGroups;
3431 : }
3432 :
3433 5 : dNumGroups += gd->dNumHashGroups;
3434 : }
3435 : }
3436 : else
3437 : {
3438 : /* Plain GROUP BY */
3439 228 : groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3440 : parse->targetList);
3441 :
3442 228 : dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3443 : NULL);
3444 : }
3445 : }
3446 2136 : else if (parse->groupingSets)
3447 : {
3448 : /* Empty grouping sets ... one result row for each one */
3449 7 : dNumGroups = list_length(parse->groupingSets);
3450 : }
3451 2129 : else if (parse->hasAggs || root->hasHavingQual)
3452 : {
3453 : /* Plain aggregation, one result row */
3454 2129 : dNumGroups = 1;
3455 : }
3456 : else
3457 : {
3458 : /* Not grouping */
3459 0 : dNumGroups = 1;
3460 : }
3461 :
3462 2447 : return dNumGroups;
3463 : }
3464 :
3465 : /*
3466 : * estimate_hashagg_tablesize
3467 : * estimate the number of bytes that a hash aggregate hashtable will
3468 : * require based on the agg_costs, path width and dNumGroups.
3469 : *
3470 : * XXX this may be over-estimating the size now that hashagg knows to omit
3471 : * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3472 : * grouping columns not in the hashed set are counted here even though hashagg
3473 : * won't store them. Is this a problem?
3474 : */
3475 : static Size
3476 462 : estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
3477 : double dNumGroups)
3478 : {
3479 : Size hashentrysize;
3480 :
3481 : /* Estimate per-hash-entry space at tuple width... */
3482 462 : hashentrysize = MAXALIGN(path->pathtarget->width) +
3483 : MAXALIGN(SizeofMinimalTupleHeader);
3484 :
3485 : /* plus space for pass-by-ref transition values... */
3486 462 : hashentrysize += agg_costs->transitionSpace;
3487 : /* plus the per-hash-entry overhead */
3488 462 : hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3489 :
3490 : /*
3491 : * Note that this disregards the effect of fill-factor and growth policy
3492 : * of the hash-table. That's probably ok, given default the default
3493 : * fill-factor is relatively high. It'd be hard to meaningfully factor in
3494 : * "double-in-size" growth policies here.
3495 : */
3496 462 : return hashentrysize * dNumGroups;
3497 : }
3498 :
3499 : /*
3500 : * create_grouping_paths
3501 : *
3502 : * Build a new upperrel containing Paths for grouping and/or aggregation.
3503 : *
3504 : * input_rel: contains the source-data Paths
3505 : * target: the pathtarget for the result Paths to compute
3506 : * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
3507 : * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
3508 : * rollup_groupclauses: list of grouping clauses for grouping sets,
3509 : * or NIL if not doing grouping sets
3510 : *
3511 : * Note: all Paths in input_rel are expected to return the target computed
3512 : * by make_group_input_target.
3513 : *
3514 : * We need to consider sorted and hashed aggregation in the same function,
3515 : * because otherwise (1) it would be harder to throw an appropriate error
3516 : * message if neither way works, and (2) we should not allow hashtable size
3517 : * considerations to dissuade us from using hashing if sorting is not possible.
3518 : */
3519 : static RelOptInfo *
3520 2405 : create_grouping_paths(PlannerInfo *root,
3521 : RelOptInfo *input_rel,
3522 : PathTarget *target,
3523 : const AggClauseCosts *agg_costs,
3524 : grouping_sets_data *gd)
3525 : {
3526 2405 : Query *parse = root->parse;
3527 2405 : Path *cheapest_path = input_rel->cheapest_total_path;
3528 : RelOptInfo *grouped_rel;
3529 2405 : PathTarget *partial_grouping_target = NULL;
3530 : AggClauseCosts agg_partial_costs; /* parallel only */
3531 : AggClauseCosts agg_final_costs; /* parallel only */
3532 : Size hashaggtablesize;
3533 : double dNumGroups;
3534 2405 : double dNumPartialGroups = 0;
3535 : bool can_hash;
3536 : bool can_sort;
3537 : bool try_parallel_aggregation;
3538 :
3539 : ListCell *lc;
3540 :
3541 : /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
3542 2405 : grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3543 :
3544 : /*
3545 : * If the input relation is not parallel-safe, then the grouped relation
3546 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
3547 : * target list and HAVING quals are parallel-safe.
3548 : */
3549 4312 : if (input_rel->consider_parallel &&
3550 3785 : is_parallel_safe(root, (Node *) target->exprs) &&
3551 1878 : is_parallel_safe(root, (Node *) parse->havingQual))
3552 1875 : grouped_rel->consider_parallel = true;
3553 :
3554 : /*
3555 : * If the input rel belongs to a single FDW, so does the grouped rel.
3556 : */
3557 2405 : grouped_rel->serverid = input_rel->serverid;
3558 2405 : grouped_rel->userid = input_rel->userid;
3559 2405 : grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3560 2405 : grouped_rel->fdwroutine = input_rel->fdwroutine;
3561 :
3562 : /*
3563 : * Check for degenerate grouping.
3564 : */
3565 2522 : if ((root->hasHavingQual || parse->groupingSets) &&
3566 126 : !parse->hasAggs && parse->groupClause == NIL)
3567 : {
3568 : /*
3569 : * We have a HAVING qual and/or grouping sets, but no aggregates and
3570 : * no GROUP BY (which implies that the grouping sets are all empty).
3571 : *
3572 : * This is a degenerate case in which we are supposed to emit either
3573 : * zero or one row for each grouping set depending on whether HAVING
3574 : * succeeds. Furthermore, there cannot be any variables in either
3575 : * HAVING or the targetlist, so we actually do not need the FROM table
3576 : * at all! We can just throw away the plan-so-far and generate a
3577 : * Result node. This is a sufficiently unusual corner case that it's
3578 : * not worth contorting the structure of this module to avoid having
3579 : * to generate the earlier paths in the first place.
3580 : */
3581 3 : int nrows = list_length(parse->groupingSets);
3582 : Path *path;
3583 :
3584 3 : if (nrows > 1)
3585 : {
3586 : /*
3587 : * Doesn't seem worthwhile writing code to cons up a
3588 : * generate_series or a values scan to emit multiple rows. Instead
3589 : * just make N clones and append them. (With a volatile HAVING
3590 : * clause, this means you might get between 0 and N output rows.
3591 : * Offhand I think that's desired.)
3592 : */
3593 0 : List *paths = NIL;
3594 :
3595 0 : while (--nrows >= 0)
3596 : {
3597 0 : path = (Path *)
3598 : create_result_path(root, grouped_rel,
3599 : target,
3600 0 : (List *) parse->havingQual);
3601 0 : paths = lappend(paths, path);
3602 : }
3603 0 : path = (Path *)
3604 : create_append_path(grouped_rel,
3605 : paths,
3606 : NULL,
3607 : 0,
3608 : NIL);
3609 0 : path->pathtarget = target;
3610 : }
3611 : else
3612 : {
3613 : /* No grouping sets, or just one, so one output row */
3614 3 : path = (Path *)
3615 : create_result_path(root, grouped_rel,
3616 : target,
3617 3 : (List *) parse->havingQual);
3618 : }
3619 :
3620 3 : add_path(grouped_rel, path);
3621 :
3622 : /* No need to consider any other alternatives. */
3623 3 : set_cheapest(grouped_rel);
3624 :
3625 3 : return grouped_rel;
3626 : }
3627 :
3628 : /*
3629 : * Estimate number of groups.
3630 : */
3631 2402 : dNumGroups = get_number_of_groups(root,
3632 : cheapest_path->rows,
3633 : gd);
3634 :
3635 : /*
3636 : * Determine whether it's possible to perform sort-based implementations
3637 : * of grouping. (Note that if groupClause is empty,
3638 : * grouping_is_sortable() is trivially true, and all the
3639 : * pathkeys_contained_in() tests will succeed too, so that we'll consider
3640 : * every surviving input path.)
3641 : *
3642 : * If we have grouping sets, we might be able to sort some but not all of
3643 : * them; in this case, we need can_sort to be true as long as we must
3644 : * consider any sorted-input plan.
3645 : */
3646 2492 : can_sort = (gd && gd->rollups != NIL)
3647 4714 : || grouping_is_sortable(parse->groupClause);
3648 :
3649 : /*
3650 : * Determine whether we should consider hash-based implementations of
3651 : * grouping.
3652 : *
3653 : * Hashed aggregation only applies if we're grouping. If we have grouping
3654 : * sets, some groups might be hashable but others not; in this case we set
3655 : * can_hash true as long as there is nothing globally preventing us from
3656 : * hashing (and we should therefore consider plans with hashes).
3657 : *
3658 : * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
3659 : * aggregates. (Doing so would imply storing *all* the input values in
3660 : * the hash table, and/or running many sorts in parallel, either of which
3661 : * seems like a certain loser.) We similarly don't support ordered-set
3662 : * aggregates in hashed aggregation, but that case is also included in the
3663 : * numOrderedAggs count.
3664 : *
3665 : * Note: grouping_is_hashable() is much more expensive to check than the
3666 : * other gating conditions, so we want to do it last.
3667 : */
3668 5104 : can_hash = (parse->groupClause != NIL &&
3669 2976 : agg_costs->numOrderedAggs == 0 &&
3670 287 : (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause)));
3671 :
3672 : /*
3673 : * If grouped_rel->consider_parallel is true, then paths that we generate
3674 : * for this grouping relation could be run inside of a worker, but that
3675 : * doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
3676 : * execution strategy. Figure that out.
3677 : */
3678 2402 : if (!grouped_rel->consider_parallel)
3679 : {
3680 : /* Not even parallel-safe. */
3681 530 : try_parallel_aggregation = false;
3682 : }
3683 1872 : else if (input_rel->partial_pathlist == NIL)
3684 : {
3685 : /* Nothing to use as input for partial aggregate. */
3686 1827 : try_parallel_aggregation = false;
3687 : }
3688 45 : else if (!parse->hasAggs && parse->groupClause == NIL)
3689 : {
3690 : /*
3691 : * We don't know how to do parallel aggregation unless we have either
3692 : * some aggregates or a grouping clause.
3693 : */
3694 0 : try_parallel_aggregation = false;
3695 : }
3696 45 : else if (parse->groupingSets)
3697 : {
3698 : /* We don't know how to do grouping sets in parallel. */
3699 0 : try_parallel_aggregation = false;
3700 : }
3701 45 : else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
3702 : {
3703 : /* Insufficient support for partial mode. */
3704 0 : try_parallel_aggregation = false;
3705 : }
3706 : else
3707 : {
3708 : /* Everything looks good. */
3709 45 : try_parallel_aggregation = true;
3710 : }
3711 :
3712 : /*
3713 : * Before generating paths for grouped_rel, we first generate any possible
3714 : * partial paths; that way, later code can easily consider both parallel
3715 : * and non-parallel approaches to grouping. Note that the partial paths
3716 : * we generate here are also partially aggregated, so simply pushing a
3717 : * Gather node on top is insufficient to create a final path, as would be
3718 : * the case for a scan/join rel.
3719 : */
3720 2402 : if (try_parallel_aggregation)
3721 : {
3722 45 : Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
3723 :
3724 : /*
3725 : * Build target list for partial aggregate paths. These paths cannot
3726 : * just emit the same tlist as regular aggregate paths, because (1) we
3727 : * must include Vars and Aggrefs needed in HAVING, which might not
3728 : * appear in the result tlist, and (2) the Aggrefs must be set in
3729 : * partial mode.
3730 : */
3731 45 : partial_grouping_target = make_partial_grouping_target(root, target);
3732 :
3733 : /* Estimate number of partial groups. */
3734 45 : dNumPartialGroups = get_number_of_groups(root,
3735 : cheapest_partial_path->rows,
3736 : gd);
3737 :
3738 : /*
3739 : * Collect statistics about aggregates for estimating costs of
3740 : * performing aggregation in parallel.
3741 : */
3742 45 : MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
3743 45 : MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
3744 45 : if (parse->hasAggs)
3745 : {
3746 : /* partial phase */
3747 41 : get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs,
3748 : AGGSPLIT_INITIAL_SERIAL,
3749 : &agg_partial_costs);
3750 :
3751 : /* final phase */
3752 41 : get_agg_clause_costs(root, (Node *) target->exprs,
3753 : AGGSPLIT_FINAL_DESERIAL,
3754 : &agg_final_costs);
3755 41 : get_agg_clause_costs(root, parse->havingQual,
3756 : AGGSPLIT_FINAL_DESERIAL,
3757 : &agg_final_costs);
3758 : }
3759 :
3760 45 : if (can_sort)
3761 : {
3762 : /* This was checked before setting try_parallel_aggregation */
3763 45 : Assert(parse->hasAggs || parse->groupClause);
3764 :
3765 : /*
3766 : * Use any available suitably-sorted path as input, and also
3767 : * consider sorting the cheapest partial path.
3768 : */
3769 90 : foreach(lc, input_rel->partial_pathlist)
3770 : {
3771 45 : Path *path = (Path *) lfirst(lc);
3772 : bool is_sorted;
3773 :
3774 45 : is_sorted = pathkeys_contained_in(root->group_pathkeys,
3775 : path->pathkeys);
3776 45 : if (path == cheapest_partial_path || is_sorted)
3777 : {
3778 : /* Sort the cheapest partial path, if it isn't already */
3779 45 : if (!is_sorted)
3780 11 : path = (Path *) create_sort_path(root,
3781 : grouped_rel,
3782 : path,
3783 : root->group_pathkeys,
3784 : -1.0);
3785 :
3786 45 : if (parse->hasAggs)
3787 41 : add_partial_path(grouped_rel, (Path *)
3788 82 : create_agg_path(root,
3789 : grouped_rel,
3790 : path,
3791 : partial_grouping_target,
3792 41 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3793 : AGGSPLIT_INITIAL_SERIAL,
3794 : parse->groupClause,
3795 : NIL,
3796 : &agg_partial_costs,
3797 : dNumPartialGroups));
3798 : else
3799 4 : add_partial_path(grouped_rel, (Path *)
3800 4 : create_group_path(root,
3801 : grouped_rel,
3802 : path,
3803 : partial_grouping_target,
3804 : parse->groupClause,
3805 : NIL,
3806 : dNumPartialGroups));
3807 : }
3808 : }
3809 : }
3810 :
3811 45 : if (can_hash)
3812 : {
3813 : /* Checked above */
3814 11 : Assert(parse->hasAggs || parse->groupClause);
3815 :
3816 11 : hashaggtablesize =
3817 : estimate_hashagg_tablesize(cheapest_partial_path,
3818 : &agg_partial_costs,
3819 : dNumPartialGroups);
3820 :
3821 : /*
3822 : * Tentatively produce a partial HashAgg Path, depending on if it
3823 : * looks as if the hash table will fit in work_mem.
3824 : */
3825 11 : if (hashaggtablesize < work_mem * 1024L)
3826 : {
3827 11 : add_partial_path(grouped_rel, (Path *)
3828 11 : create_agg_path(root,
3829 : grouped_rel,
3830 : cheapest_partial_path,
3831 : partial_grouping_target,
3832 : AGG_HASHED,
3833 : AGGSPLIT_INITIAL_SERIAL,
3834 : parse->groupClause,
3835 : NIL,
3836 : &agg_partial_costs,
3837 : dNumPartialGroups));
3838 : }
3839 : }
3840 : }
3841 :
3842 : /* Build final grouping paths */
3843 2402 : if (can_sort)
3844 : {
3845 : /*
3846 : * Use any available suitably-sorted path as input, and also consider
3847 : * sorting the cheapest-total path.
3848 : */
3849 4870 : foreach(lc, input_rel->pathlist)
3850 : {
3851 2468 : Path *path = (Path *) lfirst(lc);
3852 : bool is_sorted;
3853 :
3854 2468 : is_sorted = pathkeys_contained_in(root->group_pathkeys,
3855 : path->pathkeys);
3856 2468 : if (path == cheapest_path || is_sorted)
3857 : {
3858 : /* Sort the cheapest-total path if it isn't already sorted */
3859 2458 : if (!is_sorted)
3860 282 : path = (Path *) create_sort_path(root,
3861 : grouped_rel,
3862 : path,
3863 : root->group_pathkeys,
3864 : -1.0);
3865 :
3866 : /* Now decide what to stick atop it */
3867 2458 : if (parse->groupingSets)
3868 : {
3869 94 : consider_groupingsets_paths(root, grouped_rel,
3870 : path, true, can_hash, target,
3871 : gd, agg_costs, dNumGroups);
3872 : }
3873 2364 : else if (parse->hasAggs)
3874 : {
3875 : /*
3876 : * We have aggregation, possibly with plain GROUP BY. Make
3877 : * an AggPath.
3878 : */
3879 2325 : add_path(grouped_rel, (Path *)
3880 4650 : create_agg_path(root,
3881 : grouped_rel,
3882 : path,
3883 : target,
3884 2325 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3885 : AGGSPLIT_SIMPLE,
3886 : parse->groupClause,
3887 2325 : (List *) parse->havingQual,
3888 : agg_costs,
3889 : dNumGroups));
3890 : }
3891 39 : else if (parse->groupClause)
3892 : {
3893 : /*
3894 : * We have GROUP BY without aggregation or grouping sets.
3895 : * Make a GroupPath.
3896 : */
3897 39 : add_path(grouped_rel, (Path *)
3898 39 : create_group_path(root,
3899 : grouped_rel,
3900 : path,
3901 : target,
3902 : parse->groupClause,
3903 39 : (List *) parse->havingQual,
3904 : dNumGroups));
3905 : }
3906 : else
3907 : {
3908 : /* Other cases should have been handled above */
3909 0 : Assert(false);
3910 : }
3911 : }
3912 : }
3913 :
3914 : /*
3915 : * Now generate a complete GroupAgg Path atop of the cheapest partial
3916 : * path. We can do this using either Gather or Gather Merge.
3917 : */
3918 2402 : if (grouped_rel->partial_pathlist)
3919 : {
3920 45 : Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
3921 45 : double total_groups = path->rows * path->parallel_workers;
3922 :
3923 45 : path = (Path *) create_gather_path(root,
3924 : grouped_rel,
3925 : path,
3926 : partial_grouping_target,
3927 : NULL,
3928 : &total_groups);
3929 :
3930 : /*
3931 : * Since Gather's output is always unsorted, we'll need to sort,
3932 : * unless there's no GROUP BY clause or a degenerate (constant)
3933 : * one, in which case there will only be a single group.
3934 : */
3935 45 : if (root->group_pathkeys)
3936 11 : path = (Path *) create_sort_path(root,
3937 : grouped_rel,
3938 : path,
3939 : root->group_pathkeys,
3940 : -1.0);
3941 :
3942 45 : if (parse->hasAggs)
3943 41 : add_path(grouped_rel, (Path *)
3944 82 : create_agg_path(root,
3945 : grouped_rel,
3946 : path,
3947 : target,
3948 41 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3949 : AGGSPLIT_FINAL_DESERIAL,
3950 : parse->groupClause,
3951 41 : (List *) parse->havingQual,
3952 : &agg_final_costs,
3953 : dNumGroups));
3954 : else
3955 4 : add_path(grouped_rel, (Path *)
3956 4 : create_group_path(root,
3957 : grouped_rel,
3958 : path,
3959 : target,
3960 : parse->groupClause,
3961 4 : (List *) parse->havingQual,
3962 : dNumGroups));
3963 :
3964 : /*
3965 : * The point of using Gather Merge rather than Gather is that it
3966 : * can preserve the ordering of the input path, so there's no
3967 : * reason to try it unless (1) it's possible to produce more than
3968 : * one output row and (2) we want the output path to be ordered.
3969 : */
3970 45 : if (parse->groupClause != NIL && root->group_pathkeys != NIL)
3971 : {
3972 28 : foreach(lc, grouped_rel->partial_pathlist)
3973 : {
3974 17 : Path *subpath = (Path *) lfirst(lc);
3975 : Path *gmpath;
3976 : double total_groups;
3977 :
3978 : /*
3979 : * It's useful to consider paths that are already properly
3980 : * ordered for Gather Merge, because those don't need a
3981 : * sort. It's also useful to consider the cheapest path,
3982 : * because sorting it in parallel and then doing Gather
3983 : * Merge may be better than doing an unordered Gather
3984 : * followed by a sort. But there's no point in
3985 : * considering non-cheapest paths that aren't already
3986 : * sorted correctly.
3987 : */
3988 34 : if (path != subpath &&
3989 17 : !pathkeys_contained_in(root->group_pathkeys,
3990 : subpath->pathkeys))
3991 6 : continue;
3992 :
3993 11 : total_groups = subpath->rows * subpath->parallel_workers;
3994 :
3995 11 : gmpath = (Path *)
3996 11 : create_gather_merge_path(root,
3997 : grouped_rel,
3998 : subpath,
3999 : partial_grouping_target,
4000 : root->group_pathkeys,
4001 : NULL,
4002 : &total_groups);
4003 :
4004 11 : if (parse->hasAggs)
4005 7 : add_path(grouped_rel, (Path *)
4006 14 : create_agg_path(root,
4007 : grouped_rel,
4008 : gmpath,
4009 : target,
4010 7 : parse->groupClause ? AGG_SORTED : AGG_PLAIN,
4011 : AGGSPLIT_FINAL_DESERIAL,
4012 : parse->groupClause,
4013 7 : (List *) parse->havingQual,
4014 : &agg_final_costs,
4015 : dNumGroups));
4016 : else
4017 4 : add_path(grouped_rel, (Path *)
4018 4 : create_group_path(root,
4019 : grouped_rel,
4020 : gmpath,
4021 : target,
4022 : parse->groupClause,
4023 4 : (List *) parse->havingQual,
4024 : dNumGroups));
4025 : }
4026 : }
4027 : }
4028 : }
4029 :
4030 2402 : if (can_hash)
4031 : {
4032 287 : if (parse->groupingSets)
4033 : {
4034 : /*
4035 : * Try for a hash-only groupingsets path over unsorted input.
4036 : */
4037 77 : consider_groupingsets_paths(root, grouped_rel,
4038 : cheapest_path, false, true, target,
4039 : gd, agg_costs, dNumGroups);
4040 : }
4041 : else
4042 : {
4043 210 : hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
4044 : agg_costs,
4045 : dNumGroups);
4046 :
4047 : /*
4048 : * Provided that the estimated size of the hashtable does not
4049 : * exceed work_mem, we'll generate a HashAgg Path, although if we
4050 : * were unable to sort above, then we'd better generate a Path, so
4051 : * that we at least have one.
4052 : */
4053 220 : if (hashaggtablesize < work_mem * 1024L ||
4054 10 : grouped_rel->pathlist == NIL)
4055 : {
4056 : /*
4057 : * We just need an Agg over the cheapest-total input path,
4058 : * since input order won't matter.
4059 : */
4060 200 : add_path(grouped_rel, (Path *)
4061 200 : create_agg_path(root, grouped_rel,
4062 : cheapest_path,
4063 : target,
4064 : AGG_HASHED,
4065 : AGGSPLIT_SIMPLE,
4066 : parse->groupClause,
4067 200 : (List *) parse->havingQual,
4068 : agg_costs,
4069 : dNumGroups));
4070 : }
4071 : }
4072 :
4073 : /*
4074 : * Generate a HashAgg Path atop of the cheapest partial path. Once
4075 : * again, we'll only do this if it looks as though the hash table
4076 : * won't exceed work_mem.
4077 : */
4078 287 : if (grouped_rel->partial_pathlist)
4079 : {
4080 11 : Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
4081 :
4082 11 : hashaggtablesize = estimate_hashagg_tablesize(path,
4083 : &agg_final_costs,
4084 : dNumGroups);
4085 :
4086 11 : if (hashaggtablesize < work_mem * 1024L)
4087 : {
4088 11 : double total_groups = path->rows * path->parallel_workers;
4089 :
4090 11 : path = (Path *) create_gather_path(root,
4091 : grouped_rel,
4092 : path,
4093 : partial_grouping_target,
4094 : NULL,
4095 : &total_groups);
4096 :
4097 11 : add_path(grouped_rel, (Path *)
4098 11 : create_agg_path(root,
4099 : grouped_rel,
4100 : path,
4101 : target,
4102 : AGG_HASHED,
4103 : AGGSPLIT_FINAL_DESERIAL,
4104 : parse->groupClause,
4105 11 : (List *) parse->havingQual,
4106 : &agg_final_costs,
4107 : dNumGroups));
4108 : }
4109 : }
4110 : }
4111 :
4112 : /* Give a helpful error if we failed to find any implementation */
4113 2402 : if (grouped_rel->pathlist == NIL)
4114 1 : ereport(ERROR,
4115 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4116 : errmsg("could not implement GROUP BY"),
4117 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4118 :
4119 : /*
4120 : * If there is an FDW that's responsible for all baserels of the query,
4121 : * let it consider adding ForeignPaths.
4122 : */
4123 2401 : if (grouped_rel->fdwroutine &&
4124 0 : grouped_rel->fdwroutine->GetForeignUpperPaths)
4125 0 : grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
4126 : input_rel, grouped_rel);
4127 :
4128 : /* Let extensions possibly add some more paths */
4129 2401 : if (create_upper_paths_hook)
4130 0 : (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
4131 : input_rel, grouped_rel);
4132 :
4133 : /* Now choose the best path(s) */
4134 2401 : set_cheapest(grouped_rel);
4135 :
4136 : /*
4137 : * We've been using the partial pathlist for the grouped relation to hold
4138 : * partially aggregated paths, but that's actually a little bit bogus
4139 : * because it's unsafe for later planning stages -- like ordered_rel ---
4140 : * to get the idea that they can use these partial paths as if they didn't
4141 : * need a FinalizeAggregate step. Zap the partial pathlist at this stage
4142 : * so we don't get confused.
4143 : */
4144 2401 : grouped_rel->partial_pathlist = NIL;
4145 :
4146 2401 : return grouped_rel;
4147 : }
4148 :
4149 :
4150 : /*
4151 : * For a given input path, consider the possible ways of doing grouping sets on
4152 : * it, by combinations of hashing and sorting. This can be called multiple
4153 : * times, so it's important that it not scribble on input. No result is
4154 : * returned, but any generated paths are added to grouped_rel.
4155 : */
4156 : static void
4157 171 : consider_groupingsets_paths(PlannerInfo *root,
4158 : RelOptInfo *grouped_rel,
4159 : Path *path,
4160 : bool is_sorted,
4161 : bool can_hash,
4162 : PathTarget *target,
4163 : grouping_sets_data *gd,
4164 : const AggClauseCosts *agg_costs,
4165 : double dNumGroups)
4166 : {
4167 171 : Query *parse = root->parse;
4168 :
4169 : /*
4170 : * If we're not being offered sorted input, then only consider plans that
4171 : * can be done entirely by hashing.
4172 : *
4173 : * We can hash everything if it looks like it'll fit in work_mem. But if
4174 : * the input is actually sorted despite not being advertised as such, we
4175 : * prefer to make use of that in order to use less memory.
4176 : *
4177 : * If none of the grouping sets are sortable, then ignore the work_mem
4178 : * limit and generate a path anyway, since otherwise we'll just fail.
4179 : */
4180 171 : if (!is_sorted)
4181 : {
4182 77 : List *new_rollups = NIL;
4183 77 : RollupData *unhashed_rollup = NULL;
4184 : List *sets_data;
4185 77 : List *empty_sets_data = NIL;
4186 77 : List *empty_sets = NIL;
4187 : ListCell *lc;
4188 77 : ListCell *l_start = list_head(gd->rollups);
4189 77 : AggStrategy strat = AGG_HASHED;
4190 : Size hashsize;
4191 77 : double exclude_groups = 0.0;
4192 :
4193 77 : Assert(can_hash);
4194 :
4195 77 : if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys))
4196 : {
4197 5 : unhashed_rollup = lfirst(l_start);
4198 5 : exclude_groups = unhashed_rollup->numGroups;
4199 5 : l_start = lnext(l_start);
4200 : }
4201 :
4202 77 : hashsize = estimate_hashagg_tablesize(path,
4203 : agg_costs,
4204 : dNumGroups - exclude_groups);
4205 :
4206 : /*
4207 : * gd->rollups is empty if we have only unsortable columns to work
4208 : * with. Override work_mem in that case; otherwise, we'll rely on the
4209 : * sorted-input case to generate usable mixed paths.
4210 : */
4211 77 : if (hashsize > work_mem * 1024L && gd->rollups)
4212 3 : return; /* nope, won't fit */
4213 :
4214 : /*
4215 : * We need to burst the existing rollups list into individual grouping
4216 : * sets and recompute a groupClause for each set.
4217 : */
4218 74 : sets_data = list_copy(gd->unsortable_sets);
4219 :
4220 179 : for_each_cell(lc, l_start)
4221 : {
4222 109 : RollupData *rollup = lfirst(lc);
4223 :
4224 : /*
4225 : * If we find an unhashable rollup that's not been skipped by the
4226 : * "actually sorted" check above, we can't cope; we'd need sorted
4227 : * input (with a different sort order) but we can't get that here.
4228 : * So bail out; we'll get a valid path from the is_sorted case
4229 : * instead.
4230 : *
4231 : * The mere presence of empty grouping sets doesn't make a rollup
4232 : * unhashable (see preprocess_grouping_sets), we handle those
4233 : * specially below.
4234 : */
4235 109 : if (!rollup->hashable)
4236 4 : return;
4237 : else
4238 105 : sets_data = list_concat(sets_data, list_copy(rollup->gsets_data));
4239 : }
4240 284 : foreach(lc, sets_data)
4241 : {
4242 214 : GroupingSetData *gs = lfirst(lc);
4243 214 : List *gset = gs->set;
4244 : RollupData *rollup;
4245 :
4246 214 : if (gset == NIL)
4247 : {
4248 : /* Empty grouping sets can't be hashed. */
4249 51 : empty_sets_data = lappend(empty_sets_data, gs);
4250 51 : empty_sets = lappend(empty_sets, NIL);
4251 : }
4252 : else
4253 : {
4254 163 : rollup = makeNode(RollupData);
4255 :
4256 163 : rollup->groupClause = preprocess_groupclause(root, gset);
4257 163 : rollup->gsets_data = list_make1(gs);
4258 163 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4259 : rollup->gsets_data,
4260 : gd->tleref_to_colnum_map);
4261 163 : rollup->numGroups = gs->numGroups;
4262 163 : rollup->hashable = true;
4263 163 : rollup->is_hashed = true;
4264 163 : new_rollups = lappend(new_rollups, rollup);
4265 : }
4266 : }
4267 :
4268 : /*
4269 : * If we didn't find anything nonempty to hash, then bail. We'll
4270 : * generate a path from the is_sorted case.
4271 : */
4272 70 : if (new_rollups == NIL)
4273 4 : return;
4274 :
4275 : /*
4276 : * If there were empty grouping sets they should have been in the
4277 : * first rollup.
4278 : */
4279 66 : Assert(!unhashed_rollup || !empty_sets);
4280 :
4281 66 : if (unhashed_rollup)
4282 : {
4283 1 : new_rollups = lappend(new_rollups, unhashed_rollup);
4284 1 : strat = AGG_MIXED;
4285 : }
4286 65 : else if (empty_sets)
4287 : {
4288 43 : RollupData *rollup = makeNode(RollupData);
4289 :
4290 43 : rollup->groupClause = NIL;
4291 43 : rollup->gsets_data = empty_sets_data;
4292 43 : rollup->gsets = empty_sets;
4293 43 : rollup->numGroups = list_length(empty_sets);
4294 43 : rollup->hashable = false;
4295 43 : rollup->is_hashed = false;
4296 43 : new_rollups = lappend(new_rollups, rollup);
4297 43 : strat = AGG_MIXED;
4298 : }
4299 :
4300 66 : add_path(grouped_rel, (Path *)
4301 66 : create_groupingsets_path(root,
4302 : grouped_rel,
4303 : path,
4304 : target,
4305 66 : (List *) parse->havingQual,
4306 : strat,
4307 : new_rollups,
4308 : agg_costs,
4309 : dNumGroups));
4310 66 : return;
4311 : }
4312 :
4313 : /*
4314 : * If we have sorted input but nothing we can do with it, bail.
4315 : */
4316 94 : if (list_length(gd->rollups) == 0)
4317 0 : return;
4318 :
4319 : /*
4320 : * Given sorted input, we try and make two paths: one sorted and one mixed
4321 : * sort/hash. (We need to try both because hashagg might be disabled, or
4322 : * some columns might not be sortable.)
4323 : *
4324 : * can_hash is passed in as false if some obstacle elsewhere (such as
4325 : * ordered aggs) means that we shouldn't consider hashing at all.
4326 : */
4327 94 : if (can_hash && gd->any_hashable)
4328 : {
4329 81 : List *rollups = NIL;
4330 81 : List *hash_sets = list_copy(gd->unsortable_sets);
4331 81 : double availspace = (work_mem * 1024.0);
4332 : ListCell *lc;
4333 :
4334 : /*
4335 : * Account first for space needed for groups we can't sort at all.
4336 : */
4337 81 : availspace -= (double) estimate_hashagg_tablesize(path,
4338 : agg_costs,
4339 : gd->dNumHashGroups);
4340 :
4341 81 : if (availspace > 0 && list_length(gd->rollups) > 1)
4342 : {
4343 : double scale;
4344 40 : int num_rollups = list_length(gd->rollups);
4345 : int k_capacity;
4346 40 : int *k_weights = palloc(num_rollups * sizeof(int));
4347 40 : Bitmapset *hash_items = NULL;
4348 : int i;
4349 :
4350 : /*
4351 : * We treat this as a knapsack problem: the knapsack capacity
4352 : * represents work_mem, the item weights are the estimated memory
4353 : * usage of the hashtables needed to implement a single rollup,
4354 : * and we really ought to use the cost saving as the item value;
4355 : * however, currently the costs assigned to sort nodes don't
4356 : * reflect the comparison costs well, and so we treat all items as
4357 : * of equal value (each rollup we hash instead saves us one sort).
4358 : *
4359 : * To use the discrete knapsack, we need to scale the values to a
4360 : * reasonably small bounded range. We choose to allow a 5% error
4361 : * margin; we have no more than 4096 rollups in the worst possible
4362 : * case, which with a 5% error margin will require a bit over 42MB
4363 : * of workspace. (Anyone wanting to plan queries that complex had
4364 : * better have the memory for it. In more reasonable cases, with
4365 : * no more than a couple of dozen rollups, the memory usage will
4366 : * be negligible.)
4367 : *
4368 : * k_capacity is naturally bounded, but we clamp the values for
4369 : * scale and weight (below) to avoid overflows or underflows (or
4370 : * uselessly trying to use a scale factor less than 1 byte).
4371 : */
4372 40 : scale = Max(availspace / (20.0 * num_rollups), 1.0);
4373 40 : k_capacity = (int) floor(availspace / scale);
4374 :
4375 : /*
4376 : * We leave the first rollup out of consideration since it's the
4377 : * one that matches the input sort order. We assign indexes "i"
4378 : * to only those entries considered for hashing; the second loop,
4379 : * below, must use the same condition.
4380 : */
4381 40 : i = 0;
4382 112 : for_each_cell(lc, lnext(list_head(gd->rollups)))
4383 : {
4384 72 : RollupData *rollup = lfirst(lc);
4385 :
4386 72 : if (rollup->hashable)
4387 : {
4388 72 : double sz = estimate_hashagg_tablesize(path,
4389 : agg_costs,
4390 : rollup->numGroups);
4391 :
4392 : /*
4393 : * If sz is enormous, but work_mem (and hence scale) is
4394 : * small, avoid integer overflow here.
4395 : */
4396 72 : k_weights[i] = (int) Min(floor(sz / scale),
4397 : k_capacity + 1.0);
4398 72 : ++i;
4399 : }
4400 : }
4401 :
4402 : /*
4403 : * Apply knapsack algorithm; compute the set of items which
4404 : * maximizes the value stored (in this case the number of sorts
4405 : * saved) while keeping the total size (approximately) within
4406 : * capacity.
4407 : */
4408 40 : if (i > 0)
4409 40 : hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL);
4410 :
4411 40 : if (!bms_is_empty(hash_items))
4412 : {
4413 40 : rollups = list_make1(linitial(gd->rollups));
4414 :
4415 40 : i = 0;
4416 112 : for_each_cell(lc, lnext(list_head(gd->rollups)))
4417 : {
4418 72 : RollupData *rollup = lfirst(lc);
4419 :
4420 72 : if (rollup->hashable)
4421 : {
4422 72 : if (bms_is_member(i, hash_items))
4423 66 : hash_sets = list_concat(hash_sets,
4424 66 : list_copy(rollup->gsets_data));
4425 : else
4426 6 : rollups = lappend(rollups, rollup);
4427 72 : ++i;
4428 : }
4429 : else
4430 0 : rollups = lappend(rollups, rollup);
4431 : }
4432 : }
4433 : }
4434 :
4435 81 : if (!rollups && hash_sets)
4436 4 : rollups = list_copy(gd->rollups);
4437 :
4438 159 : foreach(lc, hash_sets)
4439 : {
4440 78 : GroupingSetData *gs = lfirst(lc);
4441 78 : RollupData *rollup = makeNode(RollupData);
4442 :
4443 78 : Assert(gs->set != NIL);
4444 :
4445 78 : rollup->groupClause = preprocess_groupclause(root, gs->set);
4446 78 : rollup->gsets_data = list_make1(gs);
4447 78 : rollup->gsets = remap_to_groupclause_idx(rollup->groupClause,
4448 : rollup->gsets_data,
4449 : gd->tleref_to_colnum_map);
4450 78 : rollup->numGroups = gs->numGroups;
4451 78 : rollup->hashable = true;
4452 78 : rollup->is_hashed = true;
4453 78 : rollups = lcons(rollup, rollups);
4454 : }
4455 :
4456 81 : if (rollups)
4457 : {
4458 44 : add_path(grouped_rel, (Path *)
4459 44 : create_groupingsets_path(root,
4460 : grouped_rel,
4461 : path,
4462 : target,
4463 44 : (List *) parse->havingQual,
4464 : AGG_MIXED,
4465 : rollups,
4466 : agg_costs,
4467 : dNumGroups));
4468 : }
4469 : }
4470 :
4471 : /*
4472 : * Now try the simple sorted case.
4473 : */
4474 94 : if (!gd->unsortable_sets)
4475 89 : add_path(grouped_rel, (Path *)
4476 178 : create_groupingsets_path(root,
4477 : grouped_rel,
4478 : path,
4479 : target,
4480 89 : (List *) parse->havingQual,
4481 : AGG_SORTED,
4482 : gd->rollups,
4483 : agg_costs,
4484 : dNumGroups));
4485 : }
4486 :
4487 : /*
4488 : * create_window_paths
4489 : *
4490 : * Build a new upperrel containing Paths for window-function evaluation.
4491 : *
4492 : * input_rel: contains the source-data Paths
4493 : * input_target: result of make_window_input_target
4494 : * output_target: what the topmost WindowAggPath should return
4495 : * tlist: query's target list (needed to look up pathkeys)
4496 : * wflists: result of find_window_functions
4497 : * activeWindows: result of select_active_windows
4498 : *
4499 : * Note: all Paths in input_rel are expected to return input_target.
4500 : */
4501 : static RelOptInfo *
4502 133 : create_window_paths(PlannerInfo *root,
4503 : RelOptInfo *input_rel,
4504 : PathTarget *input_target,
4505 : PathTarget *output_target,
4506 : List *tlist,
4507 : WindowFuncLists *wflists,
4508 : List *activeWindows)
4509 : {
4510 : RelOptInfo *window_rel;
4511 : ListCell *lc;
4512 :
4513 : /* For now, do all work in the (WINDOW, NULL) upperrel */
4514 133 : window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
4515 :
4516 : /*
4517 : * If the input relation is not parallel-safe, then the window relation
4518 : * can't be parallel-safe, either. Otherwise, we need to examine the
4519 : * target list and active windows for non-parallel-safe constructs.
4520 : */
4521 237 : if (input_rel->consider_parallel &&
4522 207 : is_parallel_safe(root, (Node *) output_target->exprs) &&
4523 103 : is_parallel_safe(root, (Node *) activeWindows))
4524 102 : window_rel->consider_parallel = true;
4525 :
4526 : /*
4527 : * If the input rel belongs to a single FDW, so does the window rel.
4528 : */
4529 133 : window_rel->serverid = input_rel->serverid;
4530 133 : window_rel->userid = input_rel->userid;
4531 133 : window_rel->useridiscurrent = input_rel->useridiscurrent;
4532 133 : window_rel->fdwroutine = input_rel->fdwroutine;
4533 :
4534 : /*
4535 : * Consider computing window functions starting from the existing
4536 : * cheapest-total path (which will likely require a sort) as well as any
4537 : * existing paths that satisfy root->window_pathkeys (which won't).
4538 : */
4539 280 : foreach(lc, input_rel->pathlist)
4540 : {
4541 147 : Path *path = (Path *) lfirst(lc);
4542 :
4543 161 : if (path == input_rel->cheapest_total_path ||
4544 14 : pathkeys_contained_in(root->window_pathkeys, path->pathkeys))
4545 138 : create_one_window_path(root,
4546 : window_rel,
4547 : path,
4548 : input_target,
4549 : output_target,
4550 : tlist,
4551 : wflists,
4552 : activeWindows);
4553 : }
4554 :
4555 : /*
4556 : * If there is an FDW that's responsible for all baserels of the query,
4557 : * let it consider adding ForeignPaths.
4558 : */
4559 133 : if (window_rel->fdwroutine &&
4560 0 : window_rel->fdwroutine->GetForeignUpperPaths)
4561 0 : window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
4562 : input_rel, window_rel);
4563 :
4564 : /* Let extensions possibly add some more paths */
4565 133 : if (create_upper_paths_hook)
4566 0 : (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
4567 : input_rel, window_rel);
4568 :
4569 : /* Now choose the best path(s) */
4570 133 : set_cheapest(window_rel);
4571 :
4572 133 : return window_rel;
4573 : }
4574 :
4575 : /*
4576 : * Stack window-function implementation steps atop the given Path, and
4577 : * add the result to window_rel.
4578 : *
4579 : * window_rel: upperrel to contain result
4580 : * path: input Path to use (must return input_target)
4581 : * input_target: result of make_window_input_target
4582 : * output_target: what the topmost WindowAggPath should return
4583 : * tlist: query's target list (needed to look up pathkeys)
4584 : * wflists: result of find_window_functions
4585 : * activeWindows: result of select_active_windows
4586 : */
4587 : static void
4588 138 : create_one_window_path(PlannerInfo *root,
4589 : RelOptInfo *window_rel,
4590 : Path *path,
4591 : PathTarget *input_target,
4592 : PathTarget *output_target,
4593 : List *tlist,
4594 : WindowFuncLists *wflists,
4595 : List *activeWindows)
4596 : {
4597 : PathTarget *window_target;
4598 : ListCell *l;
4599 :
4600 : /*
4601 : * Since each window clause could require a different sort order, we stack
4602 : * up a WindowAgg node for each clause, with sort steps between them as
4603 : * needed. (We assume that select_active_windows chose a good order for
4604 : * executing the clauses in.)
4605 : *
4606 : * input_target should contain all Vars and Aggs needed for the result.
4607 : * (In some cases we wouldn't need to propagate all of these all the way
4608 : * to the top, since they might only be needed as inputs to WindowFuncs.
4609 : * It's probably not worth trying to optimize that though.) It must also
4610 : * contain all window partitioning and sorting expressions, to ensure
4611 : * they're computed only once at the bottom of the stack (that's critical
4612 : * for volatile functions). As we climb up the stack, we'll add outputs
4613 : * for the WindowFuncs computed at each level.
4614 : */
4615 138 : window_target = input_target;
4616 :
4617 285 : foreach(l, activeWindows)
4618 : {
4619 147 : WindowClause *wc = (WindowClause *) lfirst(l);
4620 : List *window_pathkeys;
4621 :
4622 147 : window_pathkeys = make_pathkeys_for_window(root,
4623 : wc,
4624 : tlist);
4625 :
4626 : /* Sort if necessary */
4627 147 : if (!pathkeys_contained_in(window_pathkeys, path->pathkeys))
4628 : {
4629 116 : path = (Path *) create_sort_path(root, window_rel,
4630 : path,
4631 : window_pathkeys,
4632 : -1.0);
4633 : }
4634 :
4635 147 : if (lnext(l))
4636 : {
4637 : /*
4638 : * Add the current WindowFuncs to the output target for this
4639 : * intermediate WindowAggPath. We must copy window_target to
4640 : * avoid changing the previous path's target.
4641 : *
4642 : * Note: a WindowFunc adds nothing to the target's eval costs; but
4643 : * we do need to account for the increase in tlist width.
4644 : */
4645 : ListCell *lc2;
4646 :
4647 9 : window_target = copy_pathtarget(window_target);
4648 18 : foreach(lc2, wflists->windowFuncs[wc->winref])
4649 : {
4650 9 : WindowFunc *wfunc = lfirst_node(WindowFunc, lc2);
4651 :
4652 9 : add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4653 9 : window_target->width += get_typavgwidth(wfunc->wintype, -1);
4654 : }
4655 : }
4656 : else
4657 : {
4658 : /* Install the goal target in the topmost WindowAgg */
4659 138 : window_target = output_target;
4660 : }
4661 :
4662 147 : path = (Path *)
4663 147 : create_windowagg_path(root, window_rel, path, window_target,
4664 147 : wflists->windowFuncs[wc->winref],
4665 : wc,
4666 : window_pathkeys);
4667 : }
4668 :
4669 138 : add_path(window_rel, path);
4670 138 : }
4671 :
4672 : /*
4673 : * create_distinct_paths
4674 : *
4675 : * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4676 : *
4677 : * input_rel: contains the source-data Paths
4678 : *
4679 : * Note: input paths should already compute the desired pathtarget, since
4680 : * Sort/Unique won't project anything.
4681 : */
4682 : static RelOptInfo *
4683 60 : create_distinct_paths(PlannerInfo *root,
4684 : RelOptInfo *input_rel)
4685 : {
4686 60 : Query *parse = root->parse;
4687 60 : Path *cheapest_input_path = input_rel->cheapest_total_path;
4688 : RelOptInfo *distinct_rel;
4689 : double numDistinctRows;
4690 : bool allow_hash;
4691 : Path *path;
4692 : ListCell *lc;
4693 :
4694 : /* For now, do all work in the (DISTINCT, NULL) upperrel */
4695 60 : distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4696 :
4697 : /*
4698 : * We don't compute anything at this level, so distinct_rel will be
4699 : * parallel-safe if the input rel is parallel-safe. In particular, if
4700 : * there is a DISTINCT ON (...) clause, any path for the input_rel will
4701 : * output those expressions, and will not be parallel-safe unless those
4702 : * expressions are parallel-safe.
4703 : */
4704 60 : distinct_rel->consider_parallel = input_rel->consider_parallel;
4705 :
4706 : /*
4707 : * If the input rel belongs to a single FDW, so does the distinct_rel.
4708 : */
4709 60 : distinct_rel->serverid = input_rel->serverid;
4710 60 : distinct_rel->userid = input_rel->userid;
4711 60 : distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4712 60 : distinct_rel->fdwroutine = input_rel->fdwroutine;
4713 :
4714 : /* Estimate number of distinct rows there will be */
4715 116 : if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4716 56 : root->hasHavingQual)
4717 : {
4718 : /*
4719 : * If there was grouping or aggregation, use the number of input rows
4720 : * as the estimated number of DISTINCT rows (ie, assume the input is
4721 : * already mostly unique).
4722 : */
4723 4 : numDistinctRows = cheapest_input_path->rows;
4724 : }
4725 : else
4726 : {
4727 : /*
4728 : * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4729 : */
4730 : List *distinctExprs;
4731 :
4732 56 : distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4733 : parse->targetList);
4734 56 : numDistinctRows = estimate_num_groups(root, distinctExprs,
4735 : cheapest_input_path->rows,
4736 : NULL);
4737 : }
4738 :
4739 : /*
4740 : * Consider sort-based implementations of DISTINCT, if possible.
4741 : */
4742 60 : if (grouping_is_sortable(parse->distinctClause))
4743 : {
4744 : /*
4745 : * First, if we have any adequately-presorted paths, just stick a
4746 : * Unique node on those. Then consider doing an explicit sort of the
4747 : * cheapest input path and Unique'ing that.
4748 : *
4749 : * When we have DISTINCT ON, we must sort by the more rigorous of
4750 : * DISTINCT and ORDER BY, else it won't have the desired behavior.
4751 : * Also, if we do have to do an explicit sort, we might as well use
4752 : * the more rigorous ordering to avoid a second sort later. (Note
4753 : * that the parser will have ensured that one clause is a prefix of
4754 : * the other.)
4755 : */
4756 : List *needed_pathkeys;
4757 :
4758 68 : if (parse->hasDistinctOn &&
4759 9 : list_length(root->distinct_pathkeys) <
4760 9 : list_length(root->sort_pathkeys))
4761 5 : needed_pathkeys = root->sort_pathkeys;
4762 : else
4763 54 : needed_pathkeys = root->distinct_pathkeys;
4764 :
4765 122 : foreach(lc, input_rel->pathlist)
4766 : {
4767 63 : Path *path = (Path *) lfirst(lc);
4768 :
4769 63 : if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4770 : {
4771 2 : add_path(distinct_rel, (Path *)
4772 2 : create_upper_unique_path(root, distinct_rel,
4773 : path,
4774 2 : list_length(root->distinct_pathkeys),
4775 : numDistinctRows));
4776 : }
4777 : }
4778 :
4779 : /* For explicit-sort case, always use the more rigorous clause */
4780 118 : if (list_length(root->distinct_pathkeys) <
4781 59 : list_length(root->sort_pathkeys))
4782 : {
4783 5 : needed_pathkeys = root->sort_pathkeys;
4784 : /* Assert checks that parser didn't mess up... */
4785 5 : Assert(pathkeys_contained_in(root->distinct_pathkeys,
4786 : needed_pathkeys));
4787 : }
4788 : else
4789 54 : needed_pathkeys = root->distinct_pathkeys;
4790 :
4791 59 : path = cheapest_input_path;
4792 59 : if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4793 57 : path = (Path *) create_sort_path(root, distinct_rel,
4794 : path,
4795 : needed_pathkeys,
4796 : -1.0);
4797 :
4798 59 : add_path(distinct_rel, (Path *)
4799 59 : create_upper_unique_path(root, distinct_rel,
4800 : path,
4801 59 : list_length(root->distinct_pathkeys),
4802 : numDistinctRows));
4803 : }
4804 :
4805 : /*
4806 : * Consider hash-based implementations of DISTINCT, if possible.
4807 : *
4808 : * If we were not able to make any other types of path, we *must* hash or
4809 : * die trying. If we do have other choices, there are several things that
4810 : * should prevent selection of hashing: if the query uses DISTINCT ON
4811 : * (because it won't really have the expected behavior if we hash), or if
4812 : * enable_hashagg is off, or if it looks like the hashtable will exceed
4813 : * work_mem.
4814 : *
4815 : * Note: grouping_is_hashable() is much more expensive to check than the
4816 : * other gating conditions, so we want to do it last.
4817 : */
4818 60 : if (distinct_rel->pathlist == NIL)
4819 1 : allow_hash = true; /* we have no alternatives */
4820 59 : else if (parse->hasDistinctOn || !enable_hashagg)
4821 9 : allow_hash = false; /* policy-based decision not to hash */
4822 : else
4823 : {
4824 : Size hashentrysize;
4825 :
4826 : /* Estimate per-hash-entry space at tuple width... */
4827 50 : hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
4828 : MAXALIGN(SizeofMinimalTupleHeader);
4829 : /* plus the per-hash-entry overhead */
4830 50 : hashentrysize += hash_agg_entry_size(0);
4831 :
4832 : /* Allow hashing only if hashtable is predicted to fit in work_mem */
4833 50 : allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
4834 : }
4835 :
4836 60 : if (allow_hash && grouping_is_hashable(parse->distinctClause))
4837 : {
4838 : /* Generate hashed aggregate path --- no sort needed */
4839 51 : add_path(distinct_rel, (Path *)
4840 51 : create_agg_path(root,
4841 : distinct_rel,
4842 : cheapest_input_path,
4843 : cheapest_input_path->pathtarget,
4844 : AGG_HASHED,
4845 : AGGSPLIT_SIMPLE,
4846 : parse->distinctClause,
4847 : NIL,
4848 : NULL,
4849 : numDistinctRows));
4850 : }
4851 :
4852 : /* Give a helpful error if we failed to find any implementation */
4853 60 : if (distinct_rel->pathlist == NIL)
4854 0 : ereport(ERROR,
4855 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4856 : errmsg("could not implement DISTINCT"),
4857 : errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4858 :
4859 : /*
4860 : * If there is an FDW that's responsible for all baserels of the query,
4861 : * let it consider adding ForeignPaths.
4862 : */
4863 60 : if (distinct_rel->fdwroutine &&
4864 0 : distinct_rel->fdwroutine->GetForeignUpperPaths)
4865 0 : distinct_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_DISTINCT,
4866 : input_rel, distinct_rel);
4867 :
4868 : /* Let extensions possibly add some more paths */
4869 60 : if (create_upper_paths_hook)
4870 0 : (*create_upper_paths_hook) (root, UPPERREL_DISTINCT,
4871 : input_rel, distinct_rel);
4872 :
4873 : /* Now choose the best path(s) */
4874 60 : set_cheapest(distinct_rel);
4875 :
4876 60 : return distinct_rel;
4877 : }
4878 :
4879 : /*
4880 : * create_ordered_paths
4881 : *
4882 : * Build a new upperrel containing Paths for ORDER BY evaluation.
4883 : *
4884 : * All paths in the result must satisfy the ORDER BY ordering.
4885 : * The only new path we need consider is an explicit sort on the
4886 : * cheapest-total existing path.
4887 : *
4888 : * input_rel: contains the source-data Paths
4889 : * target: the output tlist the result Paths must emit
4890 : * limit_tuples: estimated bound on the number of output tuples,
4891 : * or -1 if no LIMIT or couldn't estimate
4892 : */
4893 : static RelOptInfo *
4894 2816 : create_ordered_paths(PlannerInfo *root,
4895 : RelOptInfo *input_rel,
4896 : PathTarget *target,
4897 : double limit_tuples)
4898 : {
4899 2816 : Path *cheapest_input_path = input_rel->cheapest_total_path;
4900 : RelOptInfo *ordered_rel;
4901 : ListCell *lc;
4902 :
4903 : /* For now, do all work in the (ORDERED, NULL) upperrel */
4904 2816 : ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4905 :
4906 : /*
4907 : * If the input relation is not parallel-safe, then the ordered relation
4908 : * can't be parallel-safe, either. Otherwise, it's parallel-safe if the
4909 : * target list is parallel-safe.
4910 : */
4911 5065 : if (input_rel->consider_parallel &&
4912 2249 : is_parallel_safe(root, (Node *) target->exprs))
4913 1612 : ordered_rel->consider_parallel = true;
4914 :
4915 : /*
4916 : * If the input rel belongs to a single FDW, so does the ordered_rel.
4917 : */
4918 2816 : ordered_rel->serverid = input_rel->serverid;
4919 2816 : ordered_rel->userid = input_rel->userid;
4920 2816 : ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4921 2816 : ordered_rel->fdwroutine = input_rel->fdwroutine;
4922 :
4923 6674 : foreach(lc, input_rel->pathlist)
4924 : {
4925 3858 : Path *path = (Path *) lfirst(lc);
4926 : bool is_sorted;
4927 :
4928 3858 : is_sorted = pathkeys_contained_in(root->sort_pathkeys,
4929 : path->pathkeys);
4930 3858 : if (path == cheapest_input_path || is_sorted)
4931 : {
4932 3814 : if (!is_sorted)
4933 : {
4934 : /* An explicit sort here can take advantage of LIMIT */
4935 2303 : path = (Path *) create_sort_path(root,
4936 : ordered_rel,
4937 : path,
4938 : root->sort_pathkeys,
4939 : limit_tuples);
4940 : }
4941 :
4942 : /* Add projection step if needed */
4943 3814 : if (path->pathtarget != target)
4944 35 : path = apply_projection_to_path(root, ordered_rel,
4945 : path, target);
4946 :
4947 3814 : add_path(ordered_rel, path);
4948 : }
4949 : }
4950 :
4951 : /*
4952 : * generate_gather_paths() will have already generated a simple Gather
4953 : * path for the best parallel path, if any, and the loop above will have
4954 : * considered sorting it. Similarly, generate_gather_paths() will also
4955 : * have generated order-preserving Gather Merge plans which can be used
4956 : * without sorting if they happen to match the sort_pathkeys, and the loop
4957 : * above will have handled those as well. However, there's one more
4958 : * possibility: it may make sense to sort the cheapest partial path
4959 : * according to the required output order and then use Gather Merge.
4960 : */
4961 4422 : if (ordered_rel->consider_parallel && root->sort_pathkeys != NIL &&
4962 1606 : input_rel->partial_pathlist != NIL)
4963 : {
4964 : Path *cheapest_partial_path;
4965 :
4966 32 : cheapest_partial_path = linitial(input_rel->partial_pathlist);
4967 :
4968 : /*
4969 : * If cheapest partial path doesn't need a sort, this is redundant
4970 : * with what's already been tried.
4971 : */
4972 32 : if (!pathkeys_contained_in(root->sort_pathkeys,
4973 : cheapest_partial_path->pathkeys))
4974 : {
4975 : Path *path;
4976 : double total_groups;
4977 :
4978 32 : path = (Path *) create_sort_path(root,
4979 : ordered_rel,
4980 : cheapest_partial_path,
4981 : root->sort_pathkeys,
4982 : -1.0);
4983 :
4984 64 : total_groups = cheapest_partial_path->rows *
4985 32 : cheapest_partial_path->parallel_workers;
4986 32 : path = (Path *)
4987 32 : create_gather_merge_path(root, ordered_rel,
4988 : path,
4989 : target, root->sort_pathkeys, NULL,
4990 : &total_groups);
4991 :
4992 : /* Add projection step if needed */
4993 32 : if (path->pathtarget != target)
4994 0 : path = apply_projection_to_path(root, ordered_rel,
4995 : path, target);
4996 :
4997 32 : add_path(ordered_rel, path);
4998 : }
4999 : }
5000 :
5001 : /*
5002 : * If there is an FDW that's responsible for all baserels of the query,
5003 : * let it consider adding ForeignPaths.
5004 : */
5005 2816 : if (ordered_rel->fdwroutine &&
5006 0 : ordered_rel->fdwroutine->GetForeignUpperPaths)
5007 0 : ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
5008 : input_rel, ordered_rel);
5009 :
5010 : /* Let extensions possibly add some more paths */
5011 2816 : if (create_upper_paths_hook)
5012 0 : (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
5013 : input_rel, ordered_rel);
5014 :
5015 : /*
5016 : * No need to bother with set_cheapest here; grouping_planner does not
5017 : * need us to do it.
5018 : */
5019 2816 : Assert(ordered_rel->pathlist != NIL);
5020 :
5021 2816 : return ordered_rel;
5022 : }
5023 :
5024 :
5025 : /*
5026 : * make_group_input_target
5027 : * Generate appropriate PathTarget for initial input to grouping nodes.
5028 : *
5029 : * If there is grouping or aggregation, the scan/join subplan cannot emit
5030 : * the query's final targetlist; for example, it certainly can't emit any
5031 : * aggregate function calls. This routine generates the correct target
5032 : * for the scan/join subplan.
5033 : *
5034 : * The query target list passed from the parser already contains entries
5035 : * for all ORDER BY and GROUP BY expressions, but it will not have entries
5036 : * for variables used only in HAVING clauses; so we need to add those
5037 : * variables to the subplan target list. Also, we flatten all expressions
5038 : * except GROUP BY items into their component variables; other expressions
5039 : * will be computed by the upper plan nodes rather than by the subplan.
5040 : * For example, given a query like
5041 : * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
5042 : * we want to pass this targetlist to the subplan:
5043 : * a+b,c,d
5044 : * where the a+b target will be used by the Sort/Group steps, and the
5045 : * other targets will be used for computing the final results.
5046 : *
5047 : * 'final_target' is the query's final target list (in PathTarget form)
5048 : *
5049 : * The result is the PathTarget to be computed by the Paths returned from
5050 : * query_planner().
5051 : */
5052 : static PathTarget *
5053 2405 : make_group_input_target(PlannerInfo *root, PathTarget *final_target)
5054 : {
5055 2405 : Query *parse = root->parse;
5056 : PathTarget *input_target;
5057 : List *non_group_cols;
5058 : List *non_group_vars;
5059 : int i;
5060 : ListCell *lc;
5061 :
5062 : /*
5063 : * We must build a target containing all grouping columns, plus any other
5064 : * Vars mentioned in the query's targetlist and HAVING qual.
5065 : */
5066 2405 : input_target = create_empty_pathtarget();
5067 2405 : non_group_cols = NIL;
5068 :
5069 2405 : i = 0;
5070 5700 : foreach(lc, final_target->exprs)
5071 : {
5072 3295 : Expr *expr = (Expr *) lfirst(lc);
5073 3295 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
5074 :
5075 3875 : if (sgref && parse->groupClause &&
5076 580 : get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5077 : {
5078 : /*
5079 : * It's a grouping column, so add it to the input target as-is.
5080 : */
5081 515 : add_column_to_pathtarget(input_target, expr, sgref);
5082 : }
5083 : else
5084 : {
5085 : /*
5086 : * Non-grouping column, so just remember the expression for later
5087 : * call to pull_var_clause.
5088 : */
5089 2780 : non_group_cols = lappend(non_group_cols, expr);
5090 : }
5091 :
5092 3295 : i++;
5093 : }
5094 :
5095 : /*
5096 : * If there's a HAVING clause, we'll need the Vars it uses, too.
5097 : */
5098 2405 : if (parse->havingQual)
5099 24 : non_group_cols = lappend(non_group_cols, parse->havingQual);
5100 :
5101 : /*
5102 : * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
5103 : * add them to the input target if not already present. (A Var used
5104 : * directly as a GROUP BY item will be present already.) Note this
5105 : * includes Vars used in resjunk items, so we are covering the needs of
5106 : * ORDER BY and window specifications. Vars used within Aggrefs and
5107 : * WindowFuncs will be pulled out here, too.
5108 : */
5109 2405 : non_group_vars = pull_var_clause((Node *) non_group_cols,
5110 : PVC_RECURSE_AGGREGATES |
5111 : PVC_RECURSE_WINDOWFUNCS |
5112 : PVC_INCLUDE_PLACEHOLDERS);
5113 2405 : add_new_columns_to_pathtarget(input_target, non_group_vars);
5114 :
5115 : /* clean up cruft */
5116 2405 : list_free(non_group_vars);
5117 2405 : list_free(non_group_cols);
5118 :
5119 : /* XXX this causes some redundant cost calculation ... */
5120 2405 : return set_pathtarget_cost_width(root, input_target);
5121 : }
5122 :
5123 : /*
5124 : * make_partial_grouping_target
5125 : * Generate appropriate PathTarget for output of partial aggregate
5126 : * (or partial grouping, if there are no aggregates) nodes.
5127 : *
5128 : * A partial aggregation node needs to emit all the same aggregates that
5129 : * a regular aggregation node would, plus any aggregates used in HAVING;
5130 : * except that the Aggref nodes should be marked as partial aggregates.
5131 : *
5132 : * In addition, we'd better emit any Vars and PlaceholderVars that are
5133 : * used outside of Aggrefs in the aggregation tlist and HAVING. (Presumably,
5134 : * these would be Vars that are grouped by or used in grouping expressions.)
5135 : *
5136 : * grouping_target is the tlist to be emitted by the topmost aggregation step.
5137 : * We get the HAVING clause out of *root.
5138 : */
5139 : static PathTarget *
5140 45 : make_partial_grouping_target(PlannerInfo *root, PathTarget *grouping_target)
5141 : {
5142 45 : Query *parse = root->parse;
5143 : PathTarget *partial_target;
5144 : List *non_group_cols;
5145 : List *non_group_exprs;
5146 : int i;
5147 : ListCell *lc;
5148 :
5149 45 : partial_target = create_empty_pathtarget();
5150 45 : non_group_cols = NIL;
5151 :
5152 45 : i = 0;
5153 105 : foreach(lc, grouping_target->exprs)
5154 : {
5155 60 : Expr *expr = (Expr *) lfirst(lc);
5156 60 : Index sgref = get_pathtarget_sortgroupref(grouping_target, i);
5157 :
5158 72 : if (sgref && parse->groupClause &&
5159 12 : get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
5160 : {
5161 : /*
5162 : * It's a grouping column, so add it to the partial_target as-is.
5163 : * (This allows the upper agg step to repeat the grouping calcs.)
5164 : */
5165 12 : add_column_to_pathtarget(partial_target, expr, sgref);
5166 : }
5167 : else
5168 : {
5169 : /*
5170 : * Non-grouping column, so just remember the expression for later
5171 : * call to pull_var_clause.
5172 : */
5173 48 : non_group_cols = lappend(non_group_cols, expr);
5174 : }
5175 :
5176 60 : i++;
5177 : }
5178 :
5179 : /*
5180 : * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
5181 : */
5182 45 : if (parse->havingQual)
5183 0 : non_group_cols = lappend(non_group_cols, parse->havingQual);
5184 :
5185 : /*
5186 : * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
5187 : * non-group cols (plus HAVING), and add them to the partial_target if not
5188 : * already present. (An expression used directly as a GROUP BY item will
5189 : * be present already.) Note this includes Vars used in resjunk items, so
5190 : * we are covering the needs of ORDER BY and window specifications.
5191 : */
5192 45 : non_group_exprs = pull_var_clause((Node *) non_group_cols,
5193 : PVC_INCLUDE_AGGREGATES |
5194 : PVC_RECURSE_WINDOWFUNCS |
5195 : PVC_INCLUDE_PLACEHOLDERS);
5196 :
5197 45 : add_new_columns_to_pathtarget(partial_target, non_group_exprs);
5198 :
5199 : /*
5200 : * Adjust Aggrefs to put them in partial mode. At this point all Aggrefs
5201 : * are at the top level of the target list, so we can just scan the list
5202 : * rather than recursing through the expression trees.
5203 : */
5204 105 : foreach(lc, partial_target->exprs)
5205 : {
5206 60 : Aggref *aggref = (Aggref *) lfirst(lc);
5207 :
5208 60 : if (IsA(aggref, Aggref))
5209 : {
5210 : Aggref *newaggref;
5211 :
5212 : /*
5213 : * We shouldn't need to copy the substructure of the Aggref node,
5214 : * but flat-copy the node itself to avoid damaging other trees.
5215 : */
5216 48 : newaggref = makeNode(Aggref);
5217 48 : memcpy(newaggref, aggref, sizeof(Aggref));
5218 :
5219 : /* For now, assume serialization is required */
5220 48 : mark_partial_aggref(newaggref, AGGSPLIT_INITIAL_SERIAL);
5221 :
5222 48 : lfirst(lc) = newaggref;
5223 : }
5224 : }
5225 :
5226 : /* clean up cruft */
5227 45 : list_free(non_group_exprs);
5228 45 : list_free(non_group_cols);
5229 :
5230 : /* XXX this causes some redundant cost calculation ... */
5231 45 : return set_pathtarget_cost_width(root, partial_target);
5232 : }
5233 :
5234 : /*
5235 : * mark_partial_aggref
5236 : * Adjust an Aggref to make it represent a partial-aggregation step.
5237 : *
5238 : * The Aggref node is modified in-place; caller must do any copying required.
5239 : */
5240 : void
5241 88 : mark_partial_aggref(Aggref *agg, AggSplit aggsplit)
5242 : {
5243 : /* aggtranstype should be computed by this point */
5244 88 : Assert(OidIsValid(agg->aggtranstype));
5245 : /* ... but aggsplit should still be as the parser left it */
5246 88 : Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
5247 :
5248 : /* Mark the Aggref with the intended partial-aggregation mode */
5249 88 : agg->aggsplit = aggsplit;
5250 :
5251 : /*
5252 : * Adjust result type if needed. Normally, a partial aggregate returns
5253 : * the aggregate's transition type; but if that's INTERNAL and we're
5254 : * serializing, it returns BYTEA instead.
5255 : */
5256 88 : if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
5257 : {
5258 68 : if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
5259 0 : agg->aggtype = BYTEAOID;
5260 : else
5261 68 : agg->aggtype = agg->aggtranstype;
5262 : }
5263 88 : }
5264 :
5265 : /*
5266 : * postprocess_setop_tlist
5267 : * Fix up targetlist returned by plan_set_operations().
5268 : *
5269 : * We need to transpose sort key info from the orig_tlist into new_tlist.
5270 : * NOTE: this would not be good enough if we supported resjunk sort keys
5271 : * for results of set operations --- then, we'd need to project a whole
5272 : * new tlist to evaluate the resjunk columns. For now, just ereport if we
5273 : * find any resjunk columns in orig_tlist.
5274 : */
5275 : static List *
5276 127 : postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
5277 : {
5278 : ListCell *l;
5279 127 : ListCell *orig_tlist_item = list_head(orig_tlist);
5280 :
5281 354 : foreach(l, new_tlist)
5282 : {
5283 227 : TargetEntry *new_tle = (TargetEntry *) lfirst(l);
5284 : TargetEntry *orig_tle;
5285 :
5286 : /* ignore resjunk columns in setop result */
5287 227 : if (new_tle->resjunk)
5288 35 : continue;
5289 :
5290 192 : Assert(orig_tlist_item != NULL);
5291 192 : orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
5292 192 : orig_tlist_item = lnext(orig_tlist_item);
5293 192 : if (orig_tle->resjunk) /* should not happen */
5294 0 : elog(ERROR, "resjunk output columns are not implemented");
5295 192 : Assert(new_tle->resno == orig_tle->resno);
5296 192 : new_tle->ressortgroupref = orig_tle->ressortgroupref;
5297 : }
5298 127 : if (orig_tlist_item != NULL)
5299 0 : elog(ERROR, "resjunk output columns are not implemented");
5300 127 : return new_tlist;
5301 : }
5302 :
5303 : /*
5304 : * select_active_windows
5305 : * Create a list of the "active" window clauses (ie, those referenced
5306 : * by non-deleted WindowFuncs) in the order they are to be executed.
5307 : */
5308 : static List *
5309 133 : select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
5310 : {
5311 : List *result;
5312 : List *actives;
5313 : ListCell *lc;
5314 :
5315 : /* First, make a list of the active windows */
5316 133 : actives = NIL;
5317 274 : foreach(lc, root->parse->windowClause)
5318 : {
5319 141 : WindowClause *wc = (WindowClause *) lfirst(lc);
5320 :
5321 : /* It's only active if wflists shows some related WindowFuncs */
5322 141 : Assert(wc->winref <= wflists->maxWinRef);
5323 141 : if (wflists->windowFuncs[wc->winref] != NIL)
5324 140 : actives = lappend(actives, wc);
5325 : }
5326 :
5327 : /*
5328 : * Now, ensure that windows with identical partitioning/ordering clauses
5329 : * are adjacent in the list. This is required by the SQL standard, which
5330 : * says that only one sort is to be used for such windows, even if they
5331 : * are otherwise distinct (eg, different names or framing clauses).
5332 : *
5333 : * There is room to be much smarter here, for example detecting whether
5334 : * one window's sort keys are a prefix of another's (so that sorting for
5335 : * the latter would do for the former), or putting windows first that
5336 : * match a sort order available for the underlying query. For the moment
5337 : * we are content with meeting the spec.
5338 : */
5339 133 : result = NIL;
5340 405 : while (actives != NIL)
5341 : {
5342 139 : WindowClause *wc = (WindowClause *) linitial(actives);
5343 : ListCell *prev;
5344 : ListCell *next;
5345 :
5346 : /* Move wc from actives to result */
5347 139 : actives = list_delete_first(actives);
5348 139 : result = lappend(result, wc);
5349 :
5350 : /* Now move any matching windows from actives to result */
5351 139 : prev = NULL;
5352 146 : for (lc = list_head(actives); lc; lc = next)
5353 : {
5354 7 : WindowClause *wc2 = (WindowClause *) lfirst(lc);
5355 :
5356 7 : next = lnext(lc);
5357 : /* framing options are NOT to be compared here! */
5358 11 : if (equal(wc->partitionClause, wc2->partitionClause) &&
5359 4 : equal(wc->orderClause, wc2->orderClause))
5360 : {
5361 1 : actives = list_delete_cell(actives, lc, prev);
5362 1 : result = lappend(result, wc2);
5363 : }
5364 : else
5365 6 : prev = lc;
5366 : }
5367 : }
5368 :
5369 133 : return result;
5370 : }
5371 :
5372 : /*
5373 : * make_window_input_target
5374 : * Generate appropriate PathTarget for initial input to WindowAgg nodes.
5375 : *
5376 : * When the query has window functions, this function computes the desired
5377 : * target to be computed by the node just below the first WindowAgg.
5378 : * This tlist must contain all values needed to evaluate the window functions,
5379 : * compute the final target list, and perform any required final sort step.
5380 : * If multiple WindowAggs are needed, each intermediate one adds its window
5381 : * function results onto this base tlist; only the topmost WindowAgg computes
5382 : * the actual desired target list.
5383 : *
5384 : * This function is much like make_group_input_target, though not quite enough
5385 : * like it to share code. As in that function, we flatten most expressions
5386 : * into their component variables. But we do not want to flatten window
5387 : * PARTITION BY/ORDER BY clauses, since that might result in multiple
5388 : * evaluations of them, which would be bad (possibly even resulting in
5389 : * inconsistent answers, if they contain volatile functions).
5390 : * Also, we must not flatten GROUP BY clauses that were left unflattened by
5391 : * make_group_input_target, because we may no longer have access to the
5392 : * individual Vars in them.
5393 : *
5394 : * Another key difference from make_group_input_target is that we don't
5395 : * flatten Aggref expressions, since those are to be computed below the
5396 : * window functions and just referenced like Vars above that.
5397 : *
5398 : * 'final_target' is the query's final target list (in PathTarget form)
5399 : * 'activeWindows' is the list of active windows previously identified by
5400 : * select_active_windows.
5401 : *
5402 : * The result is the PathTarget to be computed by the plan node immediately
5403 : * below the first WindowAgg node.
5404 : */
5405 : static PathTarget *
5406 133 : make_window_input_target(PlannerInfo *root,
5407 : PathTarget *final_target,
5408 : List *activeWindows)
5409 : {
5410 133 : Query *parse = root->parse;
5411 : PathTarget *input_target;
5412 : Bitmapset *sgrefs;
5413 : List *flattenable_cols;
5414 : List *flattenable_vars;
5415 : int i;
5416 : ListCell *lc;
5417 :
5418 133 : Assert(parse->hasWindowFuncs);
5419 :
5420 : /*
5421 : * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
5422 : * into a bitmapset for convenient reference below.
5423 : */
5424 133 : sgrefs = NULL;
5425 273 : foreach(lc, activeWindows)
5426 : {
5427 140 : WindowClause *wc = (WindowClause *) lfirst(lc);
5428 : ListCell *lc2;
5429 :
5430 187 : foreach(lc2, wc->partitionClause)
5431 : {
5432 47 : SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
5433 :
5434 47 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5435 : }
5436 255 : foreach(lc2, wc->orderClause)
5437 : {
5438 115 : SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
5439 :
5440 115 : sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
5441 : }
5442 : }
5443 :
5444 : /* Add in sortgroupref numbers of GROUP BY clauses, too */
5445 158 : foreach(lc, parse->groupClause)
5446 : {
5447 25 : SortGroupClause *grpcl = (SortGroupClause *) lfirst(lc);
5448 :
5449 25 : sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
5450 : }
5451 :
5452 : /*
5453 : * Construct a target containing all the non-flattenable targetlist items,
5454 : * and save aside the others for a moment.
5455 : */
5456 133 : input_target = create_empty_pathtarget();
5457 133 : flattenable_cols = NIL;
5458 :
5459 133 : i = 0;
5460 527 : foreach(lc, final_target->exprs)
5461 : {
5462 394 : Expr *expr = (Expr *) lfirst(lc);
5463 394 : Index sgref = get_pathtarget_sortgroupref(final_target, i);
5464 :
5465 : /*
5466 : * Don't want to deconstruct window clauses or GROUP BY items. (Note
5467 : * that such items can't contain window functions, so it's okay to
5468 : * compute them below the WindowAgg nodes.)
5469 : */
5470 394 : if (sgref != 0 && bms_is_member(sgref, sgrefs))
5471 : {
5472 : /*
5473 : * Don't want to deconstruct this value, so add it to the input
5474 : * target as-is.
5475 : */
5476 162 : add_column_to_pathtarget(input_target, expr, sgref);
5477 : }
5478 : else
5479 : {
5480 : /*
5481 : * Column is to be flattened, so just remember the expression for
5482 : * later call to pull_var_clause.
5483 : */
5484 232 : flattenable_cols = lappend(flattenable_cols, expr);
5485 : }
5486 :
5487 394 : i++;
5488 : }
5489 :
5490 : /*
5491 : * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
5492 : * add them to the input target if not already present. (Some might be
5493 : * there already because they're used directly as window/group clauses.)
5494 : *
5495 : * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
5496 : * Aggrefs are placed in the Agg node's tlist and not left to be computed
5497 : * at higher levels. On the other hand, we should recurse into
5498 : * WindowFuncs to make sure their input expressions are available.
5499 : */
5500 133 : flattenable_vars = pull_var_clause((Node *) flattenable_cols,
5501 : PVC_INCLUDE_AGGREGATES |
5502 : PVC_RECURSE_WINDOWFUNCS |
5503 : PVC_INCLUDE_PLACEHOLDERS);
5504 133 : add_new_columns_to_pathtarget(input_target, flattenable_vars);
5505 :
5506 : /* clean up cruft */
5507 133 : list_free(flattenable_vars);
5508 133 : list_free(flattenable_cols);
5509 :
5510 : /* XXX this causes some redundant cost calculation ... */
5511 133 : return set_pathtarget_cost_width(root, input_target);
5512 : }
5513 :
5514 : /*
5515 : * make_pathkeys_for_window
5516 : * Create a pathkeys list describing the required input ordering
5517 : * for the given WindowClause.
5518 : *
5519 : * The required ordering is first the PARTITION keys, then the ORDER keys.
5520 : * In the future we might try to implement windowing using hashing, in which
5521 : * case the ordering could be relaxed, but for now we always sort.
5522 : *
5523 : * Caution: if you change this, see createplan.c's get_column_info_for_window!
5524 : */
5525 : static List *
5526 280 : make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
5527 : List *tlist)
5528 : {
5529 : List *window_pathkeys;
5530 : List *window_sortclauses;
5531 :
5532 : /* Throw error if can't sort */
5533 280 : if (!grouping_is_sortable(wc->partitionClause))
5534 0 : ereport(ERROR,
5535 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5536 : errmsg("could not implement window PARTITION BY"),
5537 : errdetail("Window partitioning columns must be of sortable datatypes.")));
5538 280 : if (!grouping_is_sortable(wc->orderClause))
5539 0 : ereport(ERROR,
5540 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5541 : errmsg("could not implement window ORDER BY"),
5542 : errdetail("Window ordering columns must be of sortable datatypes.")));
5543 :
5544 : /* Okay, make the combined pathkeys */
5545 280 : window_sortclauses = list_concat(list_copy(wc->partitionClause),
5546 280 : list_copy(wc->orderClause));
5547 280 : window_pathkeys = make_pathkeys_for_sortclauses(root,
5548 : window_sortclauses,
5549 : tlist);
5550 280 : list_free(window_sortclauses);
5551 280 : return window_pathkeys;
5552 : }
5553 :
5554 : /*
5555 : * make_sort_input_target
5556 : * Generate appropriate PathTarget for initial input to Sort step.
5557 : *
5558 : * If the query has ORDER BY, this function chooses the target to be computed
5559 : * by the node just below the Sort (and DISTINCT, if any, since Unique can't
5560 : * project) steps. This might or might not be identical to the query's final
5561 : * output target.
5562 : *
5563 : * The main argument for keeping the sort-input tlist the same as the final
5564 : * is that we avoid a separate projection node (which will be needed if
5565 : * they're different, because Sort can't project). However, there are also
5566 : * advantages to postponing tlist evaluation till after the Sort: it ensures
5567 : * a consistent order of evaluation for any volatile functions in the tlist,
5568 : * and if there's also a LIMIT, we can stop the query without ever computing
5569 : * tlist functions for later rows, which is beneficial for both volatile and
5570 : * expensive functions.
5571 : *
5572 : * Our current policy is to postpone volatile expressions till after the sort
5573 : * unconditionally (assuming that that's possible, ie they are in plain tlist
5574 : * columns and not ORDER BY/GROUP BY/DISTINCT columns). We also prefer to
5575 : * postpone set-returning expressions, because running them beforehand would
5576 : * bloat the sort dataset, and because it might cause unexpected output order
5577 : * if the sort isn't stable. However there's a constraint on that: all SRFs
5578 : * in the tlist should be evaluated at the same plan step, so that they can
5579 : * run in sync in nodeProjectSet. So if any SRFs are in sort columns, we
5580 : * mustn't postpone any SRFs. (Note that in principle that policy should
5581 : * probably get applied to the group/window input targetlists too, but we
5582 : * have not done that historically.) Lastly, expensive expressions are
5583 : * postponed if there is a LIMIT, or if root->tuple_fraction shows that
5584 : * partial evaluation of the query is possible (if neither is true, we expect
5585 : * to have to evaluate the expressions for every row anyway), or if there are
5586 : * any volatile or set-returning expressions (since once we've put in a
5587 : * projection at all, it won't cost any more to postpone more stuff).
5588 : *
5589 : * Another issue that could potentially be considered here is that
5590 : * evaluating tlist expressions could result in data that's either wider
5591 : * or narrower than the input Vars, thus changing the volume of data that
5592 : * has to go through the Sort. However, we usually have only a very bad
5593 : * idea of the output width of any expression more complex than a Var,
5594 : * so for now it seems too risky to try to optimize on that basis.
5595 : *
5596 : * Note that if we do produce a modified sort-input target, and then the
5597 : * query ends up not using an explicit Sort, no particular harm is done:
5598 : * we'll initially use the modified target for the preceding path nodes,
5599 : * but then change them to the final target with apply_projection_to_path.
5600 : * Moreover, in such a case the guarantees about evaluation order of
5601 : * volatile functions still hold, since the rows are sorted already.
5602 : *
5603 : * This function has some things in common with make_group_input_target and
5604 : * make_window_input_target, though the detailed rules for what to do are
5605 : * different. We never flatten/postpone any grouping or ordering columns;
5606 : * those are needed before the sort. If we do flatten a particular
5607 : * expression, we leave Aggref and WindowFunc nodes alone, since those were
5608 : * computed earlier.
5609 : *
5610 : * 'final_target' is the query's final target list (in PathTarget form)
5611 : * 'have_postponed_srfs' is an output argument, see below
5612 : *
5613 : * The result is the PathTarget to be computed by the plan node immediately
5614 : * below the Sort step (and the Distinct step, if any). This will be
5615 : * exactly final_target if we decide a projection step wouldn't be helpful.
5616 : *
5617 : * In addition, *have_postponed_srfs is set to TRUE if we choose to postpone
5618 : * any set-returning functions to after the Sort.
5619 : */
5620 : static PathTarget *
5621 2767 : make_sort_input_target(PlannerInfo *root,
5622 : PathTarget *final_target,
5623 : bool *have_postponed_srfs)
5624 : {
5625 2767 : Query *parse = root->parse;
5626 : PathTarget *input_target;
5627 : int ncols;
5628 : bool *col_is_srf;
5629 : bool *postpone_col;
5630 : bool have_srf;
5631 : bool have_volatile;
5632 : bool have_expensive;
5633 : bool have_srf_sortcols;
5634 : bool postpone_srfs;
5635 : List *postponable_cols;
5636 : List *postponable_vars;
5637 : int i;
5638 : ListCell *lc;
5639 :
5640 : /* Shouldn't get here unless query has ORDER BY */
5641 2767 : Assert(parse->sortClause);
5642 :
5643 2767 : *have_postponed_srfs = false; /* default result */
5644 :
5645 : /* Inspect tlist and collect per-column information */
5646 2767 : ncols = list_length(final_target->exprs);
5647 2767 : col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
5648 2767 : postpone_col = (bool *) palloc0(ncols * sizeof(bool));
5649 2767 : have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
5650 :
5651 2767 : i = 0;
5652 13902 : foreach(lc, final_target->exprs)
5653 : {
5654 11135 : Expr *expr = (Expr *) lfirst(lc);
5655 :
5656 : /*
5657 : * If the column has a sortgroupref, assume it has to be evaluated
5658 : * before sorting. Generally such columns would be ORDER BY, GROUP
5659 : * BY, etc targets. One exception is columns that were removed from
5660 : * GROUP BY by remove_useless_groupby_columns() ... but those would
5661 : * only be Vars anyway. There don't seem to be any cases where it
5662 : * would be worth the trouble to double-check.
5663 : */
5664 11135 : if (get_pathtarget_sortgroupref(final_target, i) == 0)
5665 : {
5666 : /*
5667 : * Check for SRF or volatile functions. Check the SRF case first
5668 : * because we must know whether we have any postponed SRFs.
5669 : */
5670 15267 : if (parse->hasTargetSRFs &&
5671 29 : expression_returns_set((Node *) expr))
5672 : {
5673 : /* We'll decide below whether these are postponable */
5674 12 : col_is_srf[i] = true;
5675 12 : have_srf = true;
5676 : }
5677 7607 : else if (contain_volatile_functions((Node *) expr))
5678 : {
5679 : /* Unconditionally postpone */
5680 11 : postpone_col[i] = true;
5681 11 : have_volatile = true;
5682 : }
5683 : else
5684 : {
5685 : /*
5686 : * Else check the cost. XXX it's annoying to have to do this
5687 : * when set_pathtarget_cost_width() just did it. Refactor to
5688 : * allow sharing the work?
5689 : */
5690 : QualCost cost;
5691 :
5692 7596 : cost_qual_eval_node(&cost, (Node *) expr, root);
5693 :
5694 : /*
5695 : * We arbitrarily define "expensive" as "more than 10X
5696 : * cpu_operator_cost". Note this will take in any PL function
5697 : * with default cost.
5698 : */
5699 7596 : if (cost.per_tuple > 10 * cpu_operator_cost)
5700 : {
5701 1100 : postpone_col[i] = true;
5702 1100 : have_expensive = true;
5703 : }
5704 : }
5705 : }
5706 : else
5707 : {
5708 : /* For sortgroupref cols, just check if any contain SRFs */
5709 7032 : if (!have_srf_sortcols &&
5710 3561 : parse->hasTargetSRFs &&
5711 45 : expression_returns_set((Node *) expr))
5712 18 : have_srf_sortcols = true;
5713 : }
5714 :
5715 11135 : i++;
5716 : }
5717 :
5718 : /*
5719 : * We can postpone SRFs if we have some but none are in sortgroupref cols.
5720 : */
5721 2767 : postpone_srfs = (have_srf && !have_srf_sortcols);
5722 :
5723 : /*
5724 : * If we don't need a post-sort projection, just return final_target.
5725 : */
5726 2767 : if (!(postpone_srfs || have_volatile ||
5727 660 : (have_expensive &&
5728 1318 : (parse->limitCount || root->tuple_fraction > 0))))
5729 2746 : return final_target;
5730 :
5731 : /*
5732 : * Report whether the post-sort projection will contain set-returning
5733 : * functions. This is important because it affects whether the Sort can
5734 : * rely on the query's LIMIT (if any) to bound the number of rows it needs
5735 : * to return.
5736 : */
5737 21 : *have_postponed_srfs = postpone_srfs;
5738 :
5739 : /*
5740 : * Construct the sort-input target, taking all non-postponable columns and
5741 : * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5742 : * the postponable ones.
5743 : */
5744 21 : input_target = create_empty_pathtarget();
5745 21 : postponable_cols = NIL;
5746 :
5747 21 : i = 0;
5748 140 : foreach(lc, final_target->exprs)
5749 : {
5750 119 : Expr *expr = (Expr *) lfirst(lc);
5751 :
5752 119 : if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5753 21 : postponable_cols = lappend(postponable_cols, expr);
5754 : else
5755 196 : add_column_to_pathtarget(input_target, expr,
5756 196 : get_pathtarget_sortgroupref(final_target, i));
5757 :
5758 119 : i++;
5759 : }
5760 :
5761 : /*
5762 : * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5763 : * postponable columns, and add them to the sort-input target if not
5764 : * already present. (Some might be there already.) We mustn't
5765 : * deconstruct Aggrefs or WindowFuncs here, since the projection node
5766 : * would be unable to recompute them.
5767 : */
5768 21 : postponable_vars = pull_var_clause((Node *) postponable_cols,
5769 : PVC_INCLUDE_AGGREGATES |
5770 : PVC_INCLUDE_WINDOWFUNCS |
5771 : PVC_INCLUDE_PLACEHOLDERS);
5772 21 : add_new_columns_to_pathtarget(input_target, postponable_vars);
5773 :
5774 : /* clean up cruft */
5775 21 : list_free(postponable_vars);
5776 21 : list_free(postponable_cols);
5777 :
5778 : /* XXX this represents even more redundant cost calculation ... */
5779 21 : return set_pathtarget_cost_width(root, input_target);
5780 : }
5781 :
5782 : /*
5783 : * get_cheapest_fractional_path
5784 : * Find the cheapest path for retrieving a specified fraction of all
5785 : * the tuples expected to be returned by the given relation.
5786 : *
5787 : * We interpret tuple_fraction the same way as grouping_planner.
5788 : *
5789 : * We assume set_cheapest() has been run on the given rel.
5790 : */
5791 : Path *
5792 24163 : get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
5793 : {
5794 24163 : Path *best_path = rel->cheapest_total_path;
5795 : ListCell *l;
5796 :
5797 : /* If all tuples will be retrieved, just return the cheapest-total path */
5798 24163 : if (tuple_fraction <= 0.0)
5799 23879 : return best_path;
5800 :
5801 : /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
5802 284 : if (tuple_fraction >= 1.0 && best_path->rows > 0)
5803 123 : tuple_fraction /= best_path->rows;
5804 :
5805 651 : foreach(l, rel->pathlist)
5806 : {
5807 367 : Path *path = (Path *) lfirst(l);
5808 :
5809 450 : if (path == rel->cheapest_total_path ||
5810 83 : compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
5811 356 : continue;
5812 :
5813 11 : best_path = path;
5814 : }
5815 :
5816 284 : return best_path;
5817 : }
5818 :
5819 : /*
5820 : * adjust_paths_for_srfs
5821 : * Fix up the Paths of the given upperrel to handle tSRFs properly.
5822 : *
5823 : * The executor can only handle set-returning functions that appear at the
5824 : * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
5825 : * that are not at top level, we need to split up the evaluation into multiple
5826 : * plan levels in which each level satisfies this constraint. This function
5827 : * modifies each Path of an upperrel that (might) compute any SRFs in its
5828 : * output tlist to insert appropriate projection steps.
5829 : *
5830 : * The given targets and targets_contain_srfs lists are from
5831 : * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
5832 : * target in targets.
5833 : */
5834 : static void
5835 263 : adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
5836 : List *targets, List *targets_contain_srfs)
5837 : {
5838 : ListCell *lc;
5839 :
5840 263 : Assert(list_length(targets) == list_length(targets_contain_srfs));
5841 263 : Assert(!linitial_int(targets_contain_srfs));
5842 :
5843 : /* If no SRFs appear at this plan level, nothing to do */
5844 263 : if (list_length(targets) == 1)
5845 310 : return;
5846 :
5847 : /*
5848 : * Stack SRF-evaluation nodes atop each path for the rel.
5849 : *
5850 : * In principle we should re-run set_cheapest() here to identify the
5851 : * cheapest path, but it seems unlikely that adding the same tlist eval
5852 : * costs to all the paths would change that, so we don't bother. Instead,
5853 : * just assume that the cheapest-startup and cheapest-total paths remain
5854 : * so. (There should be no parameterized paths anymore, so we needn't
5855 : * worry about updating cheapest_parameterized_paths.)
5856 : */
5857 438 : foreach(lc, rel->pathlist)
5858 : {
5859 222 : Path *subpath = (Path *) lfirst(lc);
5860 222 : Path *newpath = subpath;
5861 : ListCell *lc1,
5862 : *lc2;
5863 :
5864 222 : Assert(subpath->param_info == NULL);
5865 735 : forboth(lc1, targets, lc2, targets_contain_srfs)
5866 : {
5867 513 : PathTarget *thistarget = (PathTarget *) lfirst(lc1);
5868 513 : bool contains_srfs = (bool) lfirst_int(lc2);
5869 :
5870 : /* If this level doesn't contain SRFs, do regular projection */
5871 513 : if (contains_srfs)
5872 232 : newpath = (Path *) create_set_projection_path(root,
5873 : rel,
5874 : newpath,
5875 : thistarget);
5876 : else
5877 281 : newpath = (Path *) apply_projection_to_path(root,
5878 : rel,
5879 : newpath,
5880 : thistarget);
5881 : }
5882 222 : lfirst(lc) = newpath;
5883 222 : if (subpath == rel->cheapest_startup_path)
5884 208 : rel->cheapest_startup_path = newpath;
5885 222 : if (subpath == rel->cheapest_total_path)
5886 208 : rel->cheapest_total_path = newpath;
5887 : }
5888 :
5889 : /* Likewise for partial paths, if any */
5890 216 : foreach(lc, rel->partial_pathlist)
5891 : {
5892 0 : Path *subpath = (Path *) lfirst(lc);
5893 0 : Path *newpath = subpath;
5894 : ListCell *lc1,
5895 : *lc2;
5896 :
5897 0 : Assert(subpath->param_info == NULL);
5898 0 : forboth(lc1, targets, lc2, targets_contain_srfs)
5899 : {
5900 0 : PathTarget *thistarget = (PathTarget *) lfirst(lc1);
5901 0 : bool contains_srfs = (bool) lfirst_int(lc2);
5902 :
5903 : /* If this level doesn't contain SRFs, do regular projection */
5904 0 : if (contains_srfs)
5905 0 : newpath = (Path *) create_set_projection_path(root,
5906 : rel,
5907 : newpath,
5908 : thistarget);
5909 : else
5910 : {
5911 : /* avoid apply_projection_to_path, in case of multiple refs */
5912 0 : newpath = (Path *) create_projection_path(root,
5913 : rel,
5914 : newpath,
5915 : thistarget);
5916 : }
5917 : }
5918 0 : lfirst(lc) = newpath;
5919 : }
5920 : }
5921 :
5922 : /*
5923 : * expression_planner
5924 : * Perform planner's transformations on a standalone expression.
5925 : *
5926 : * Various utility commands need to evaluate expressions that are not part
5927 : * of a plannable query. They can do so using the executor's regular
5928 : * expression-execution machinery, but first the expression has to be fed
5929 : * through here to transform it from parser output to something executable.
5930 : *
5931 : * Currently, we disallow sublinks in standalone expressions, so there's no
5932 : * real "planning" involved here. (That might not always be true though.)
5933 : * What we must do is run eval_const_expressions to ensure that any function
5934 : * calls are converted to positional notation and function default arguments
5935 : * get inserted. The fact that constant subexpressions get simplified is a
5936 : * side-effect that is useful when the expression will get evaluated more than
5937 : * once. Also, we must fix operator function IDs.
5938 : *
5939 : * Note: this must not make any damaging changes to the passed-in expression
5940 : * tree. (It would actually be okay to apply fix_opfuncids to it, but since
5941 : * we first do an expression_tree_mutator-based walk, what is returned will
5942 : * be a new node tree.)
5943 : */
5944 : Expr *
5945 2419 : expression_planner(Expr *expr)
5946 : {
5947 : Node *result;
5948 :
5949 : /*
5950 : * Convert named-argument function calls, insert default arguments and
5951 : * simplify constant subexprs
5952 : */
5953 2419 : result = eval_const_expressions(NULL, (Node *) expr);
5954 :
5955 : /* Fill in opfuncid values if missing */
5956 2419 : fix_opfuncids(result);
5957 :
5958 2419 : return (Expr *) result;
5959 : }
5960 :
5961 :
5962 : /*
5963 : * plan_cluster_use_sort
5964 : * Use the planner to decide how CLUSTER should implement sorting
5965 : *
5966 : * tableOid is the OID of a table to be clustered on its index indexOid
5967 : * (which is already known to be a btree index). Decide whether it's
5968 : * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
5969 : * Return TRUE to use sorting, FALSE to use an indexscan.
5970 : *
5971 : * Note: caller had better already hold some type of lock on the table.
5972 : */
5973 : bool
5974 10 : plan_cluster_use_sort(Oid tableOid, Oid indexOid)
5975 : {
5976 : PlannerInfo *root;
5977 : Query *query;
5978 : PlannerGlobal *glob;
5979 : RangeTblEntry *rte;
5980 : RelOptInfo *rel;
5981 : IndexOptInfo *indexInfo;
5982 : QualCost indexExprCost;
5983 : Cost comparisonCost;
5984 : Path *seqScanPath;
5985 : Path seqScanAndSortPath;
5986 : IndexPath *indexScanPath;
5987 : ListCell *lc;
5988 :
5989 : /* We can short-circuit the cost comparison if indexscans are disabled */
5990 10 : if (!enable_indexscan)
5991 2 : return true; /* use sort */
5992 :
5993 : /* Set up mostly-dummy planner state */
5994 8 : query = makeNode(Query);
5995 8 : query->commandType = CMD_SELECT;
5996 :
5997 8 : glob = makeNode(PlannerGlobal);
5998 :
5999 8 : root = makeNode(PlannerInfo);
6000 8 : root->parse = query;
6001 8 : root->glob = glob;
6002 8 : root->query_level = 1;
6003 8 : root->planner_cxt = CurrentMemoryContext;
6004 8 : root->wt_param_id = -1;
6005 :
6006 : /* Build a minimal RTE for the rel */
6007 8 : rte = makeNode(RangeTblEntry);
6008 8 : rte->rtekind = RTE_RELATION;
6009 8 : rte->relid = tableOid;
6010 8 : rte->relkind = RELKIND_RELATION; /* Don't be too picky. */
6011 8 : rte->lateral = false;
6012 8 : rte->inh = false;
6013 8 : rte->inFromCl = true;
6014 8 : query->rtable = list_make1(rte);
6015 :
6016 : /* Set up RTE/RelOptInfo arrays */
6017 8 : setup_simple_rel_arrays(root);
6018 :
6019 : /* Build RelOptInfo */
6020 8 : rel = build_simple_rel(root, 1, NULL);
6021 :
6022 : /* Locate IndexOptInfo for the target index */
6023 8 : indexInfo = NULL;
6024 10 : foreach(lc, rel->indexlist)
6025 : {
6026 10 : indexInfo = (IndexOptInfo *) lfirst(lc);
6027 10 : if (indexInfo->indexoid == indexOid)
6028 8 : break;
6029 : }
6030 :
6031 : /*
6032 : * It's possible that get_relation_info did not generate an IndexOptInfo
6033 : * for the desired index; this could happen if it's not yet reached its
6034 : * indcheckxmin usability horizon, or if it's a system index and we're
6035 : * ignoring system indexes. In such cases we should tell CLUSTER to not
6036 : * trust the index contents but use seqscan-and-sort.
6037 : */
6038 8 : if (lc == NULL) /* not in the list? */
6039 0 : return true; /* use sort */
6040 :
6041 : /*
6042 : * Rather than doing all the pushups that would be needed to use
6043 : * set_baserel_size_estimates, just do a quick hack for rows and width.
6044 : */
6045 8 : rel->rows = rel->tuples;
6046 8 : rel->reltarget->width = get_relation_data_width(tableOid, NULL);
6047 :
6048 8 : root->total_table_pages = rel->pages;
6049 :
6050 : /*
6051 : * Determine eval cost of the index expressions, if any. We need to
6052 : * charge twice that amount for each tuple comparison that happens during
6053 : * the sort, since tuplesort.c will have to re-evaluate the index
6054 : * expressions each time. (XXX that's pretty inefficient...)
6055 : */
6056 8 : cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
6057 8 : comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
6058 :
6059 : /* Estimate the cost of seq scan + sort */
6060 8 : seqScanPath = create_seqscan_path(root, rel, NULL, 0);
6061 16 : cost_sort(&seqScanAndSortPath, root, NIL,
6062 8 : seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
6063 : comparisonCost, maintenance_work_mem, -1.0);
6064 :
6065 : /* Estimate the cost of index scan */
6066 8 : indexScanPath = create_index_path(root, indexInfo,
6067 : NIL, NIL, NIL, NIL, NIL,
6068 : ForwardScanDirection, false,
6069 : NULL, 1.0, false);
6070 :
6071 8 : return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
6072 : }
6073 :
6074 : /*
6075 : * get_partitioned_child_rels
6076 : * Returns a list of the RT indexes of the partitioned child relations
6077 : * with rti as the root parent RT index.
6078 : *
6079 : * Note: Only call this function on RTEs known to be partitioned tables.
6080 : */
6081 : List *
6082 73 : get_partitioned_child_rels(PlannerInfo *root, Index rti)
6083 : {
6084 73 : List *result = NIL;
6085 : ListCell *l;
6086 :
6087 82 : foreach(l, root->pcinfo_list)
6088 : {
6089 82 : PartitionedChildRelInfo *pc = lfirst(l);
6090 :
6091 82 : if (pc->parent_relid == rti)
6092 : {
6093 73 : result = pc->child_rels;
6094 73 : break;
6095 : }
6096 : }
6097 :
6098 : /* The root partitioned table is included as a child rel */
6099 73 : Assert(list_length(result) >= 1);
6100 :
6101 73 : return result;
6102 : }
|