Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pathkeys.c
4 : * Utilities for matching and building path keys
5 : *
6 : * See src/backend/optimizer/README for a great deal of information about
7 : * the nature and use of path keys.
8 : *
9 : *
10 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
11 : * Portions Copyright (c) 1994, Regents of the University of California
12 : *
13 : * IDENTIFICATION
14 : * src/backend/optimizer/path/pathkeys.c
15 : *
16 : *-------------------------------------------------------------------------
17 : */
18 : #include "postgres.h"
19 :
20 : #include "access/stratnum.h"
21 : #include "nodes/makefuncs.h"
22 : #include "nodes/nodeFuncs.h"
23 : #include "nodes/plannodes.h"
24 : #include "optimizer/clauses.h"
25 : #include "optimizer/pathnode.h"
26 : #include "optimizer/paths.h"
27 : #include "optimizer/tlist.h"
28 : #include "utils/lsyscache.h"
29 :
30 :
31 : static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
32 : static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
33 :
34 :
35 : /****************************************************************************
36 : * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
37 : ****************************************************************************/
38 :
39 : /*
40 : * make_canonical_pathkey
41 : * Given the parameters for a PathKey, find any pre-existing matching
42 : * pathkey in the query's list of "canonical" pathkeys. Make a new
43 : * entry if there's not one already.
44 : *
45 : * Note that this function must not be used until after we have completed
46 : * merging EquivalenceClasses. (We don't try to enforce that here; instead,
47 : * equivclass.c will complain if a merge occurs after root->canon_pathkeys
48 : * has become nonempty.)
49 : */
50 : PathKey *
51 46918 : make_canonical_pathkey(PlannerInfo *root,
52 : EquivalenceClass *eclass, Oid opfamily,
53 : int strategy, bool nulls_first)
54 : {
55 : PathKey *pk;
56 : ListCell *lc;
57 : MemoryContext oldcontext;
58 :
59 : /* The passed eclass might be non-canonical, so chase up to the top */
60 93836 : while (eclass->ec_merged)
61 0 : eclass = eclass->ec_merged;
62 :
63 196299 : foreach(lc, root->canon_pathkeys)
64 : {
65 178714 : pk = (PathKey *) lfirst(lc);
66 221221 : if (eclass == pk->pk_eclass &&
67 85014 : opfamily == pk->pk_opfamily &&
68 71847 : strategy == pk->pk_strategy &&
69 29340 : nulls_first == pk->pk_nulls_first)
70 29333 : return pk;
71 : }
72 :
73 : /*
74 : * Be sure canonical pathkeys are allocated in the main planning context.
75 : * Not an issue in normal planning, but it is for GEQO.
76 : */
77 17585 : oldcontext = MemoryContextSwitchTo(root->planner_cxt);
78 :
79 17585 : pk = makeNode(PathKey);
80 17585 : pk->pk_eclass = eclass;
81 17585 : pk->pk_opfamily = opfamily;
82 17585 : pk->pk_strategy = strategy;
83 17585 : pk->pk_nulls_first = nulls_first;
84 :
85 17585 : root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
86 :
87 17585 : MemoryContextSwitchTo(oldcontext);
88 :
89 17585 : return pk;
90 : }
91 :
92 : /*
93 : * pathkey_is_redundant
94 : * Is a pathkey redundant with one already in the given list?
95 : *
96 : * We detect two cases:
97 : *
98 : * 1. If the new pathkey's equivalence class contains a constant, and isn't
99 : * below an outer join, then we can disregard it as a sort key. An example:
100 : * SELECT ... WHERE x = 42 ORDER BY x, y;
101 : * We may as well just sort by y. Note that because of opfamily matching,
102 : * this is semantically correct: we know that the equality constraint is one
103 : * that actually binds the variable to a single value in the terms of any
104 : * ordering operator that might go with the eclass. This rule not only lets
105 : * us simplify (or even skip) explicit sorts, but also allows matching index
106 : * sort orders to a query when there are don't-care index columns.
107 : *
108 : * 2. If the new pathkey's equivalence class is the same as that of any
109 : * existing member of the pathkey list, then it is redundant. Some examples:
110 : * SELECT ... ORDER BY x, x;
111 : * SELECT ... ORDER BY x, x DESC;
112 : * SELECT ... WHERE x = y ORDER BY x, y;
113 : * In all these cases the second sort key cannot distinguish values that are
114 : * considered equal by the first, and so there's no point in using it.
115 : * Note in particular that we need not compare opfamily (all the opfamilies
116 : * of the EC have the same notion of equality) nor sort direction.
117 : *
118 : * Both the given pathkey and the list members must be canonical for this
119 : * to work properly, but that's okay since we no longer ever construct any
120 : * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
121 : * canonical includes the additional requirement of no redundant entries,
122 : * which is exactly what we are checking for here.)
123 : *
124 : * Because the equivclass.c machinery forms only one copy of any EC per query,
125 : * pointer comparison is enough to decide whether canonical ECs are the same.
126 : */
127 : static bool
128 61045 : pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
129 : {
130 61045 : EquivalenceClass *new_ec = new_pathkey->pk_eclass;
131 : ListCell *lc;
132 :
133 : /* Check for EC containing a constant --- unconditionally redundant */
134 61045 : if (EC_MUST_BE_REDUNDANT(new_ec))
135 6289 : return true;
136 :
137 : /* If same EC already used in list, then redundant */
138 60516 : foreach(lc, pathkeys)
139 : {
140 5784 : PathKey *old_pathkey = (PathKey *) lfirst(lc);
141 :
142 5784 : if (new_ec == old_pathkey->pk_eclass)
143 24 : return true;
144 : }
145 :
146 54732 : return false;
147 : }
148 :
149 : /*
150 : * make_pathkey_from_sortinfo
151 : * Given an expression and sort-order information, create a PathKey.
152 : * The result is always a "canonical" PathKey, but it might be redundant.
153 : *
154 : * expr is the expression, and nullable_relids is the set of base relids
155 : * that are potentially nullable below it.
156 : *
157 : * If the PathKey is being generated from a SortGroupClause, sortref should be
158 : * the SortGroupClause's SortGroupRef; otherwise zero.
159 : *
160 : * If rel is not NULL, it identifies a specific relation we're considering
161 : * a path for, and indicates that child EC members for that relation can be
162 : * considered. Otherwise child members are ignored. (See the comments for
163 : * get_eclass_for_sort_expr.)
164 : *
165 : * create_it is TRUE if we should create any missing EquivalenceClass
166 : * needed to represent the sort key. If it's FALSE, we return NULL if the
167 : * sort key isn't already present in any EquivalenceClass.
168 : */
169 : static PathKey *
170 48103 : make_pathkey_from_sortinfo(PlannerInfo *root,
171 : Expr *expr,
172 : Relids nullable_relids,
173 : Oid opfamily,
174 : Oid opcintype,
175 : Oid collation,
176 : bool reverse_sort,
177 : bool nulls_first,
178 : Index sortref,
179 : Relids rel,
180 : bool create_it)
181 : {
182 : int16 strategy;
183 : Oid equality_op;
184 : List *opfamilies;
185 : EquivalenceClass *eclass;
186 :
187 48103 : strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
188 :
189 : /*
190 : * EquivalenceClasses need to contain opfamily lists based on the family
191 : * membership of mergejoinable equality operators, which could belong to
192 : * more than one opfamily. So we have to look up the opfamily's equality
193 : * operator and get its membership.
194 : */
195 48103 : equality_op = get_opfamily_member(opfamily,
196 : opcintype,
197 : opcintype,
198 : BTEqualStrategyNumber);
199 48103 : if (!OidIsValid(equality_op)) /* shouldn't happen */
200 0 : elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
201 : BTEqualStrategyNumber, opcintype, opcintype, opfamily);
202 48103 : opfamilies = get_mergejoin_opfamilies(equality_op);
203 48103 : if (!opfamilies) /* certainly should find some */
204 0 : elog(ERROR, "could not find opfamilies for equality operator %u",
205 : equality_op);
206 :
207 : /* Now find or (optionally) create a matching EquivalenceClass */
208 48103 : eclass = get_eclass_for_sort_expr(root, expr, nullable_relids,
209 : opfamilies, opcintype, collation,
210 : sortref, rel, create_it);
211 :
212 : /* Fail if no EC and !create_it */
213 48103 : if (!eclass)
214 17197 : return NULL;
215 :
216 : /* And finally we can find or create a PathKey node */
217 30906 : return make_canonical_pathkey(root, eclass, opfamily,
218 : strategy, nulls_first);
219 : }
220 :
221 : /*
222 : * make_pathkey_from_sortop
223 : * Like make_pathkey_from_sortinfo, but work from a sort operator.
224 : *
225 : * This should eventually go away, but we need to restructure SortGroupClause
226 : * first.
227 : */
228 : static PathKey *
229 4589 : make_pathkey_from_sortop(PlannerInfo *root,
230 : Expr *expr,
231 : Relids nullable_relids,
232 : Oid ordering_op,
233 : bool nulls_first,
234 : Index sortref,
235 : bool create_it)
236 : {
237 : Oid opfamily,
238 : opcintype,
239 : collation;
240 : int16 strategy;
241 :
242 : /* Find the operator in pg_amop --- failure shouldn't happen */
243 4589 : if (!get_ordering_op_properties(ordering_op,
244 : &opfamily, &opcintype, &strategy))
245 0 : elog(ERROR, "operator %u is not a valid ordering operator",
246 : ordering_op);
247 :
248 : /* Because SortGroupClause doesn't carry collation, consult the expr */
249 4589 : collation = exprCollation((Node *) expr);
250 :
251 4589 : return make_pathkey_from_sortinfo(root,
252 : expr,
253 : nullable_relids,
254 : opfamily,
255 : opcintype,
256 : collation,
257 : (strategy == BTGreaterStrategyNumber),
258 : nulls_first,
259 : sortref,
260 : NULL,
261 : create_it);
262 : }
263 :
264 :
265 : /****************************************************************************
266 : * PATHKEY COMPARISONS
267 : ****************************************************************************/
268 :
269 : /*
270 : * compare_pathkeys
271 : * Compare two pathkeys to see if they are equivalent, and if not whether
272 : * one is "better" than the other.
273 : *
274 : * We assume the pathkeys are canonical, and so they can be checked for
275 : * equality by simple pointer comparison.
276 : */
277 : PathKeysComparison
278 225833 : compare_pathkeys(List *keys1, List *keys2)
279 : {
280 : ListCell *key1,
281 : *key2;
282 :
283 : /*
284 : * Fall out quickly if we are passed two identical lists. This mostly
285 : * catches the case where both are NIL, but that's common enough to
286 : * warrant the test.
287 : */
288 225833 : if (keys1 == keys2)
289 98581 : return PATHKEYS_EQUAL;
290 :
291 151398 : forboth(key1, keys1, key2, keys2)
292 : {
293 49203 : PathKey *pathkey1 = (PathKey *) lfirst(key1);
294 49203 : PathKey *pathkey2 = (PathKey *) lfirst(key2);
295 :
296 49203 : if (pathkey1 != pathkey2)
297 25057 : return PATHKEYS_DIFFERENT; /* no need to keep looking */
298 : }
299 :
300 : /*
301 : * If we reached the end of only one list, the other is longer and
302 : * therefore not a subset.
303 : */
304 102195 : if (key1 != NULL)
305 73743 : return PATHKEYS_BETTER1; /* key1 is longer */
306 28452 : if (key2 != NULL)
307 10377 : return PATHKEYS_BETTER2; /* key2 is longer */
308 18075 : return PATHKEYS_EQUAL;
309 : }
310 :
311 : /*
312 : * pathkeys_contained_in
313 : * Common special case of compare_pathkeys: we just want to know
314 : * if keys2 are at least as well sorted as keys1.
315 : */
316 : bool
317 93863 : pathkeys_contained_in(List *keys1, List *keys2)
318 : {
319 93863 : switch (compare_pathkeys(keys1, keys2))
320 : {
321 : case PATHKEYS_EQUAL:
322 : case PATHKEYS_BETTER2:
323 18800 : return true;
324 : default:
325 75063 : break;
326 : }
327 75063 : return false;
328 : }
329 :
330 : /*
331 : * get_cheapest_path_for_pathkeys
332 : * Find the cheapest path (according to the specified criterion) that
333 : * satisfies the given pathkeys and parameterization.
334 : * Return NULL if no such path.
335 : *
336 : * 'paths' is a list of possible paths that all generate the same relation
337 : * 'pathkeys' represents a required ordering (in canonical form!)
338 : * 'required_outer' denotes allowable outer relations for parameterized paths
339 : * 'cost_criterion' is STARTUP_COST or TOTAL_COST
340 : * 'require_parallel_safe' causes us to consider only parallel-safe paths
341 : */
342 : Path *
343 13106 : get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
344 : Relids required_outer,
345 : CostSelector cost_criterion,
346 : bool require_parallel_safe)
347 : {
348 13106 : Path *matched_path = NULL;
349 : ListCell *l;
350 :
351 43560 : foreach(l, paths)
352 : {
353 30454 : Path *path = (Path *) lfirst(l);
354 :
355 : /*
356 : * Since cost comparison is a lot cheaper than pathkey comparison, do
357 : * that first. (XXX is that still true?)
358 : */
359 30920 : if (matched_path != NULL &&
360 466 : compare_path_costs(matched_path, path, cost_criterion) <= 0)
361 444 : continue;
362 :
363 30010 : if (require_parallel_safe && !path->parallel_safe)
364 0 : continue;
365 :
366 41809 : if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
367 11799 : bms_is_subset(PATH_REQ_OUTER(path), required_outer))
368 6763 : matched_path = path;
369 : }
370 13106 : return matched_path;
371 : }
372 :
373 : /*
374 : * get_cheapest_fractional_path_for_pathkeys
375 : * Find the cheapest path (for retrieving a specified fraction of all
376 : * the tuples) that satisfies the given pathkeys and parameterization.
377 : * Return NULL if no such path.
378 : *
379 : * See compare_fractional_path_costs() for the interpretation of the fraction
380 : * parameter.
381 : *
382 : * 'paths' is a list of possible paths that all generate the same relation
383 : * 'pathkeys' represents a required ordering (in canonical form!)
384 : * 'required_outer' denotes allowable outer relations for parameterized paths
385 : * 'fraction' is the fraction of the total tuples expected to be retrieved
386 : */
387 : Path *
388 154 : get_cheapest_fractional_path_for_pathkeys(List *paths,
389 : List *pathkeys,
390 : Relids required_outer,
391 : double fraction)
392 : {
393 154 : Path *matched_path = NULL;
394 : ListCell *l;
395 :
396 401 : foreach(l, paths)
397 : {
398 247 : Path *path = (Path *) lfirst(l);
399 :
400 : /*
401 : * Since cost comparison is a lot cheaper than pathkey comparison, do
402 : * that first. (XXX is that still true?)
403 : */
404 280 : if (matched_path != NULL &&
405 33 : compare_fractional_path_costs(matched_path, path, fraction) <= 0)
406 8 : continue;
407 :
408 333 : if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
409 94 : bms_is_subset(PATH_REQ_OUTER(path), required_outer))
410 94 : matched_path = path;
411 : }
412 154 : return matched_path;
413 : }
414 :
415 :
416 : /*
417 : * get_cheapest_parallel_safe_total_inner
418 : * Find the unparameterized parallel-safe path with the least total cost.
419 : */
420 : Path *
421 12 : get_cheapest_parallel_safe_total_inner(List *paths)
422 : {
423 : ListCell *l;
424 :
425 28 : foreach(l, paths)
426 : {
427 28 : Path *innerpath = (Path *) lfirst(l);
428 :
429 44 : if (innerpath->parallel_safe &&
430 16 : bms_is_empty(PATH_REQ_OUTER(innerpath)))
431 12 : return innerpath;
432 : }
433 :
434 0 : return NULL;
435 : }
436 :
437 : /****************************************************************************
438 : * NEW PATHKEY FORMATION
439 : ****************************************************************************/
440 :
441 : /*
442 : * build_index_pathkeys
443 : * Build a pathkeys list that describes the ordering induced by an index
444 : * scan using the given index. (Note that an unordered index doesn't
445 : * induce any ordering, so we return NIL.)
446 : *
447 : * If 'scandir' is BackwardScanDirection, build pathkeys representing a
448 : * backwards scan of the index.
449 : *
450 : * The result is canonical, meaning that redundant pathkeys are removed;
451 : * it may therefore have fewer entries than there are index columns.
452 : *
453 : * Another reason for stopping early is that we may be able to tell that
454 : * an index column's sort order is uninteresting for this query. However,
455 : * that test is just based on the existence of an EquivalenceClass and not
456 : * on position in pathkey lists, so it's not complete. Caller should call
457 : * truncate_useless_pathkeys() to possibly remove more pathkeys.
458 : */
459 : List *
460 35708 : build_index_pathkeys(PlannerInfo *root,
461 : IndexOptInfo *index,
462 : ScanDirection scandir)
463 : {
464 35708 : List *retval = NIL;
465 : ListCell *lc;
466 : int i;
467 :
468 35708 : if (index->sortopfamily == NULL)
469 0 : return NIL; /* non-orderable index */
470 :
471 35708 : i = 0;
472 62024 : foreach(lc, index->indextlist)
473 : {
474 43452 : TargetEntry *indextle = (TargetEntry *) lfirst(lc);
475 : Expr *indexkey;
476 : bool reverse_sort;
477 : bool nulls_first;
478 : PathKey *cpathkey;
479 :
480 : /* We assume we don't need to make a copy of the tlist item */
481 43452 : indexkey = indextle->expr;
482 :
483 43452 : if (ScanDirectionIsBackward(scandir))
484 : {
485 21726 : reverse_sort = !index->reverse_sort[i];
486 21726 : nulls_first = !index->nulls_first[i];
487 : }
488 : else
489 : {
490 21726 : reverse_sort = index->reverse_sort[i];
491 21726 : nulls_first = index->nulls_first[i];
492 : }
493 :
494 : /*
495 : * OK, try to make a canonical pathkey for this sort key. Note we're
496 : * underneath any outer joins, so nullable_relids should be NULL.
497 : */
498 173808 : cpathkey = make_pathkey_from_sortinfo(root,
499 : indexkey,
500 : NULL,
501 43452 : index->sortopfamily[i],
502 43452 : index->opcintype[i],
503 43452 : index->indexcollations[i],
504 : reverse_sort,
505 : nulls_first,
506 : 0,
507 43452 : index->rel->relids,
508 : false);
509 :
510 43452 : if (cpathkey)
511 : {
512 : /*
513 : * We found the sort key in an EquivalenceClass, so it's relevant
514 : * for this query. Add it to list, unless it's redundant.
515 : */
516 26310 : if (!pathkey_is_redundant(cpathkey, retval))
517 20094 : retval = lappend(retval, cpathkey);
518 : }
519 : else
520 : {
521 : /*
522 : * Boolean index keys might be redundant even if they do not
523 : * appear in an EquivalenceClass, because of our special treatment
524 : * of boolean equality conditions --- see the comment for
525 : * indexcol_is_bool_constant_for_query(). If that applies, we can
526 : * continue to examine lower-order index columns. Otherwise, the
527 : * sort key is not an interesting sort order for this query, so we
528 : * should stop considering index columns; any lower-order sort
529 : * keys won't be useful either.
530 : */
531 17142 : if (!indexcol_is_bool_constant_for_query(index, i))
532 17136 : break;
533 : }
534 :
535 26316 : i++;
536 : }
537 :
538 35708 : return retval;
539 : }
540 :
541 : /*
542 : * build_expression_pathkey
543 : * Build a pathkeys list that describes an ordering by a single expression
544 : * using the given sort operator.
545 : *
546 : * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo.
547 : * We induce the other arguments assuming default sort order for the operator.
548 : *
549 : * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
550 : * is false and the expression isn't already in some EquivalenceClass.
551 : */
552 : List *
553 62 : build_expression_pathkey(PlannerInfo *root,
554 : Expr *expr,
555 : Relids nullable_relids,
556 : Oid opno,
557 : Relids rel,
558 : bool create_it)
559 : {
560 : List *pathkeys;
561 : Oid opfamily,
562 : opcintype;
563 : int16 strategy;
564 : PathKey *cpathkey;
565 :
566 : /* Find the operator in pg_amop --- failure shouldn't happen */
567 62 : if (!get_ordering_op_properties(opno,
568 : &opfamily, &opcintype, &strategy))
569 0 : elog(ERROR, "operator %u is not a valid ordering operator",
570 : opno);
571 :
572 62 : cpathkey = make_pathkey_from_sortinfo(root,
573 : expr,
574 : nullable_relids,
575 : opfamily,
576 : opcintype,
577 : exprCollation((Node *) expr),
578 : (strategy == BTGreaterStrategyNumber),
579 : (strategy == BTGreaterStrategyNumber),
580 : 0,
581 : rel,
582 : create_it);
583 :
584 62 : if (cpathkey)
585 7 : pathkeys = list_make1(cpathkey);
586 : else
587 55 : pathkeys = NIL;
588 :
589 62 : return pathkeys;
590 : }
591 :
592 : /*
593 : * convert_subquery_pathkeys
594 : * Build a pathkeys list that describes the ordering of a subquery's
595 : * result, in the terms of the outer query. This is essentially a
596 : * task of conversion.
597 : *
598 : * 'rel': outer query's RelOptInfo for the subquery relation.
599 : * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
600 : * 'subquery_tlist': the subquery's output targetlist, in its terms.
601 : *
602 : * It is not necessary for caller to do truncate_useless_pathkeys(),
603 : * because we select keys in a way that takes usefulness of the keys into
604 : * account.
605 : */
606 : List *
607 823 : convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
608 : List *subquery_pathkeys,
609 : List *subquery_tlist)
610 : {
611 823 : List *retval = NIL;
612 823 : int retvallen = 0;
613 823 : int outer_query_keys = list_length(root->query_pathkeys);
614 : ListCell *i;
615 :
616 865 : foreach(i, subquery_pathkeys)
617 : {
618 122 : PathKey *sub_pathkey = (PathKey *) lfirst(i);
619 122 : EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
620 122 : PathKey *best_pathkey = NULL;
621 :
622 122 : if (sub_eclass->ec_has_volatile)
623 : {
624 : /*
625 : * If the sub_pathkey's EquivalenceClass is volatile, then it must
626 : * have come from an ORDER BY clause, and we have to match it to
627 : * that same targetlist entry.
628 : */
629 : TargetEntry *tle;
630 :
631 0 : if (sub_eclass->ec_sortref == 0) /* can't happen */
632 0 : elog(ERROR, "volatile EquivalenceClass has no sortref");
633 0 : tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
634 0 : Assert(tle);
635 : /* resjunk items aren't visible to outer query */
636 0 : if (!tle->resjunk)
637 : {
638 : /* We can represent this sub_pathkey */
639 : EquivalenceMember *sub_member;
640 : Expr *outer_expr;
641 : EquivalenceClass *outer_ec;
642 :
643 0 : Assert(list_length(sub_eclass->ec_members) == 1);
644 0 : sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
645 0 : outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle);
646 :
647 : /*
648 : * Note: it might look funny to be setting sortref = 0 for a
649 : * reference to a volatile sub_eclass. However, the
650 : * expression is *not* volatile in the outer query: it's just
651 : * a Var referencing whatever the subquery emitted. (IOW, the
652 : * outer query isn't going to re-execute the volatile
653 : * expression itself.) So this is okay. Likewise, it's
654 : * correct to pass nullable_relids = NULL, because we're
655 : * underneath any outer joins appearing in the outer query.
656 : */
657 0 : outer_ec =
658 0 : get_eclass_for_sort_expr(root,
659 : outer_expr,
660 : NULL,
661 : sub_eclass->ec_opfamilies,
662 : sub_member->em_datatype,
663 : sub_eclass->ec_collation,
664 : 0,
665 : rel->relids,
666 : false);
667 :
668 : /*
669 : * If we don't find a matching EC, sub-pathkey isn't
670 : * interesting to the outer query
671 : */
672 0 : if (outer_ec)
673 0 : best_pathkey =
674 0 : make_canonical_pathkey(root,
675 : outer_ec,
676 : sub_pathkey->pk_opfamily,
677 : sub_pathkey->pk_strategy,
678 0 : sub_pathkey->pk_nulls_first);
679 : }
680 : }
681 : else
682 : {
683 : /*
684 : * Otherwise, the sub_pathkey's EquivalenceClass could contain
685 : * multiple elements (representing knowledge that multiple items
686 : * are effectively equal). Each element might match none, one, or
687 : * more of the output columns that are visible to the outer query.
688 : * This means we may have multiple possible representations of the
689 : * sub_pathkey in the context of the outer query. Ideally we
690 : * would generate them all and put them all into an EC of the
691 : * outer query, thereby propagating equality knowledge up to the
692 : * outer query. Right now we cannot do so, because the outer
693 : * query's EquivalenceClasses are already frozen when this is
694 : * called. Instead we prefer the one that has the highest "score"
695 : * (number of EC peers, plus one if it matches the outer
696 : * query_pathkeys). This is the most likely to be useful in the
697 : * outer query.
698 : */
699 122 : int best_score = -1;
700 : ListCell *j;
701 :
702 266 : foreach(j, sub_eclass->ec_members)
703 : {
704 144 : EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
705 144 : Expr *sub_expr = sub_member->em_expr;
706 144 : Oid sub_expr_type = sub_member->em_datatype;
707 144 : Oid sub_expr_coll = sub_eclass->ec_collation;
708 : ListCell *k;
709 :
710 144 : if (sub_member->em_is_child)
711 20 : continue; /* ignore children here */
712 :
713 728 : foreach(k, subquery_tlist)
714 : {
715 604 : TargetEntry *tle = (TargetEntry *) lfirst(k);
716 : Expr *tle_expr;
717 : Expr *outer_expr;
718 : EquivalenceClass *outer_ec;
719 : PathKey *outer_pk;
720 : int score;
721 :
722 : /* resjunk items aren't visible to outer query */
723 604 : if (tle->resjunk)
724 0 : continue;
725 :
726 : /*
727 : * The targetlist entry is considered to match if it
728 : * matches after sort-key canonicalization. That is
729 : * needed since the sub_expr has been through the same
730 : * process.
731 : */
732 604 : tle_expr = canonicalize_ec_expression(tle->expr,
733 : sub_expr_type,
734 : sub_expr_coll);
735 604 : if (!equal(tle_expr, sub_expr))
736 480 : continue;
737 :
738 : /*
739 : * Build a representation of this targetlist entry as an
740 : * outer Var.
741 : */
742 124 : outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid,
743 : tle);
744 :
745 : /* See if we have a matching EC for that */
746 124 : outer_ec = get_eclass_for_sort_expr(root,
747 : outer_expr,
748 : NULL,
749 : sub_eclass->ec_opfamilies,
750 : sub_expr_type,
751 : sub_expr_coll,
752 : 0,
753 : rel->relids,
754 : false);
755 :
756 : /*
757 : * If we don't find a matching EC, this sub-pathkey isn't
758 : * interesting to the outer query
759 : */
760 124 : if (!outer_ec)
761 82 : continue;
762 :
763 42 : outer_pk = make_canonical_pathkey(root,
764 : outer_ec,
765 : sub_pathkey->pk_opfamily,
766 : sub_pathkey->pk_strategy,
767 42 : sub_pathkey->pk_nulls_first);
768 : /* score = # of equivalence peers */
769 42 : score = list_length(outer_ec->ec_members) - 1;
770 : /* +1 if it matches the proper query_pathkeys item */
771 72 : if (retvallen < outer_query_keys &&
772 30 : list_nth(root->query_pathkeys, retvallen) == outer_pk)
773 19 : score++;
774 42 : if (score > best_score)
775 : {
776 42 : best_pathkey = outer_pk;
777 42 : best_score = score;
778 : }
779 : }
780 : }
781 : }
782 :
783 : /*
784 : * If we couldn't find a representation of this sub_pathkey, we're
785 : * done (we can't use the ones to its right, either).
786 : */
787 122 : if (!best_pathkey)
788 80 : break;
789 :
790 : /*
791 : * Eliminate redundant ordering info; could happen if outer query
792 : * equivalences subquery keys...
793 : */
794 42 : if (!pathkey_is_redundant(best_pathkey, retval))
795 : {
796 41 : retval = lappend(retval, best_pathkey);
797 41 : retvallen++;
798 : }
799 : }
800 :
801 823 : return retval;
802 : }
803 :
804 : /*
805 : * build_join_pathkeys
806 : * Build the path keys for a join relation constructed by mergejoin or
807 : * nestloop join. This is normally the same as the outer path's keys.
808 : *
809 : * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
810 : * having the outer path's path keys, because null lefthand rows may be
811 : * inserted at random points. It must be treated as unsorted.
812 : *
813 : * We truncate away any pathkeys that are uninteresting for higher joins.
814 : *
815 : * 'joinrel' is the join relation that paths are being formed for
816 : * 'jointype' is the join type (inner, left, full, etc)
817 : * 'outer_pathkeys' is the list of the current outer path's path keys
818 : *
819 : * Returns the list of new path keys.
820 : */
821 : List *
822 34737 : build_join_pathkeys(PlannerInfo *root,
823 : RelOptInfo *joinrel,
824 : JoinType jointype,
825 : List *outer_pathkeys)
826 : {
827 34737 : if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
828 2938 : return NIL;
829 :
830 : /*
831 : * This used to be quite a complex bit of code, but now that all pathkey
832 : * sublists start out life canonicalized, we don't have to do a darn thing
833 : * here!
834 : *
835 : * We do, however, need to truncate the pathkeys list, since it may
836 : * contain pathkeys that were useful for forming this joinrel but are
837 : * uninteresting to higher levels.
838 : */
839 31799 : return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
840 : }
841 :
842 : /****************************************************************************
843 : * PATHKEYS AND SORT CLAUSES
844 : ****************************************************************************/
845 :
846 : /*
847 : * make_pathkeys_for_sortclauses
848 : * Generate a pathkeys list that represents the sort order specified
849 : * by a list of SortGroupClauses
850 : *
851 : * The resulting PathKeys are always in canonical form. (Actually, there
852 : * is no longer any code anywhere that creates non-canonical PathKeys.)
853 : *
854 : * We assume that root->nullable_baserels is the set of base relids that could
855 : * have gone to NULL below the SortGroupClause expressions. This is okay if
856 : * the expressions came from the query's top level (ORDER BY, DISTINCT, etc)
857 : * and if this function is only invoked after deconstruct_jointree. In the
858 : * future we might have to make callers pass in the appropriate
859 : * nullable-relids set, but for now it seems unnecessary.
860 : *
861 : * 'sortclauses' is a list of SortGroupClause nodes
862 : * 'tlist' is the targetlist to find the referenced tlist entries in
863 : */
864 : List *
865 26122 : make_pathkeys_for_sortclauses(PlannerInfo *root,
866 : List *sortclauses,
867 : List *tlist)
868 : {
869 26122 : List *pathkeys = NIL;
870 : ListCell *l;
871 :
872 30711 : foreach(l, sortclauses)
873 : {
874 4589 : SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
875 : Expr *sortkey;
876 : PathKey *pathkey;
877 :
878 4589 : sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
879 4589 : Assert(OidIsValid(sortcl->sortop));
880 9178 : pathkey = make_pathkey_from_sortop(root,
881 : sortkey,
882 : root->nullable_baserels,
883 : sortcl->sortop,
884 4589 : sortcl->nulls_first,
885 : sortcl->tleSortGroupRef,
886 : true);
887 :
888 : /* Canonical form eliminates redundant ordering keys */
889 4589 : if (!pathkey_is_redundant(pathkey, pathkeys))
890 4504 : pathkeys = lappend(pathkeys, pathkey);
891 : }
892 26122 : return pathkeys;
893 : }
894 :
895 : /****************************************************************************
896 : * PATHKEYS AND MERGECLAUSES
897 : ****************************************************************************/
898 :
899 : /*
900 : * initialize_mergeclause_eclasses
901 : * Set the EquivalenceClass links in a mergeclause restrictinfo.
902 : *
903 : * RestrictInfo contains fields in which we may cache pointers to
904 : * EquivalenceClasses for the left and right inputs of the mergeclause.
905 : * (If the mergeclause is a true equivalence clause these will be the
906 : * same EquivalenceClass, otherwise not.) If the mergeclause is either
907 : * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
908 : * then it's easy to set up the left_ec and right_ec members --- otherwise,
909 : * this function should be called to set them up. We will generate new
910 : * EquivalenceClauses if necessary to represent the mergeclause's left and
911 : * right sides.
912 : *
913 : * Note this is called before EC merging is complete, so the links won't
914 : * necessarily point to canonical ECs. Before they are actually used for
915 : * anything, update_mergeclause_eclasses must be called to ensure that
916 : * they've been updated to point to canonical ECs.
917 : */
918 : void
919 2428 : initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
920 : {
921 2428 : Expr *clause = restrictinfo->clause;
922 : Oid lefttype,
923 : righttype;
924 :
925 : /* Should be a mergeclause ... */
926 2428 : Assert(restrictinfo->mergeopfamilies != NIL);
927 : /* ... with links not yet set */
928 2428 : Assert(restrictinfo->left_ec == NULL);
929 2428 : Assert(restrictinfo->right_ec == NULL);
930 :
931 : /* Need the declared input types of the operator */
932 2428 : op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
933 :
934 : /* Find or create a matching EquivalenceClass for each side */
935 2428 : restrictinfo->left_ec =
936 4856 : get_eclass_for_sort_expr(root,
937 2428 : (Expr *) get_leftop(clause),
938 : restrictinfo->nullable_relids,
939 : restrictinfo->mergeopfamilies,
940 : lefttype,
941 : ((OpExpr *) clause)->inputcollid,
942 : 0,
943 : NULL,
944 : true);
945 2428 : restrictinfo->right_ec =
946 4856 : get_eclass_for_sort_expr(root,
947 2428 : (Expr *) get_rightop(clause),
948 : restrictinfo->nullable_relids,
949 : restrictinfo->mergeopfamilies,
950 : righttype,
951 : ((OpExpr *) clause)->inputcollid,
952 : 0,
953 : NULL,
954 : true);
955 2428 : }
956 :
957 : /*
958 : * update_mergeclause_eclasses
959 : * Make the cached EquivalenceClass links valid in a mergeclause
960 : * restrictinfo.
961 : *
962 : * These pointers should have been set by process_equivalence or
963 : * initialize_mergeclause_eclasses, but they might have been set to
964 : * non-canonical ECs that got merged later. Chase up to the canonical
965 : * merged parent if so.
966 : */
967 : void
968 92615 : update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
969 : {
970 : /* Should be a merge clause ... */
971 92615 : Assert(restrictinfo->mergeopfamilies != NIL);
972 : /* ... with pointers already set */
973 92615 : Assert(restrictinfo->left_ec != NULL);
974 92615 : Assert(restrictinfo->right_ec != NULL);
975 :
976 : /* Chase up to the top as needed */
977 185230 : while (restrictinfo->left_ec->ec_merged)
978 0 : restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
979 185257 : while (restrictinfo->right_ec->ec_merged)
980 27 : restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
981 92615 : }
982 :
983 : /*
984 : * find_mergeclauses_for_pathkeys
985 : * This routine attempts to find a set of mergeclauses that can be
986 : * used with a specified ordering for one of the input relations.
987 : * If successful, it returns a list of mergeclauses.
988 : *
989 : * 'pathkeys' is a pathkeys list showing the ordering of an input path.
990 : * 'outer_keys' is TRUE if these keys are for the outer input path,
991 : * FALSE if for inner.
992 : * 'restrictinfos' is a list of mergejoinable restriction clauses for the
993 : * join relation being formed.
994 : *
995 : * The restrictinfos must be marked (via outer_is_left) to show which side
996 : * of each clause is associated with the current outer path. (See
997 : * select_mergejoin_clauses())
998 : *
999 : * The result is NIL if no merge can be done, else a maximal list of
1000 : * usable mergeclauses (represented as a list of their restrictinfo nodes).
1001 : */
1002 : List *
1003 34155 : find_mergeclauses_for_pathkeys(PlannerInfo *root,
1004 : List *pathkeys,
1005 : bool outer_keys,
1006 : List *restrictinfos)
1007 : {
1008 34155 : List *mergeclauses = NIL;
1009 : ListCell *i;
1010 :
1011 : /* make sure we have eclasses cached in the clauses */
1012 67064 : foreach(i, restrictinfos)
1013 : {
1014 32909 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1015 :
1016 32909 : update_mergeclause_eclasses(root, rinfo);
1017 : }
1018 :
1019 53171 : foreach(i, pathkeys)
1020 : {
1021 22097 : PathKey *pathkey = (PathKey *) lfirst(i);
1022 22097 : EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
1023 22097 : List *matched_restrictinfos = NIL;
1024 : ListCell *j;
1025 :
1026 : /*----------
1027 : * A mergejoin clause matches a pathkey if it has the same EC.
1028 : * If there are multiple matching clauses, take them all. In plain
1029 : * inner-join scenarios we expect only one match, because
1030 : * equivalence-class processing will have removed any redundant
1031 : * mergeclauses. However, in outer-join scenarios there might be
1032 : * multiple matches. An example is
1033 : *
1034 : * select * from a full join b
1035 : * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
1036 : *
1037 : * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
1038 : * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
1039 : * we *must* do so or we will be unable to form a valid plan.
1040 : *
1041 : * We expect that the given pathkeys list is canonical, which means
1042 : * no two members have the same EC, so it's not possible for this
1043 : * code to enter the same mergeclause into the result list twice.
1044 : *
1045 : * It's possible that multiple matching clauses might have different
1046 : * ECs on the other side, in which case the order we put them into our
1047 : * result makes a difference in the pathkeys required for the other
1048 : * input path. However this routine hasn't got any info about which
1049 : * order would be best, so we don't worry about that.
1050 : *
1051 : * It's also possible that the selected mergejoin clauses produce
1052 : * a noncanonical ordering of pathkeys for the other side, ie, we
1053 : * might select clauses that reference b.v1, b.v2, b.v1 in that
1054 : * order. This is not harmful in itself, though it suggests that
1055 : * the clauses are partially redundant. Since it happens only with
1056 : * redundant query conditions, we don't bother to eliminate it.
1057 : * make_inner_pathkeys_for_merge() has to delete duplicates when
1058 : * it constructs the canonical pathkeys list, and we also have to
1059 : * deal with the case in create_mergejoin_plan().
1060 : *----------
1061 : */
1062 47818 : foreach(j, restrictinfos)
1063 : {
1064 25721 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1065 : EquivalenceClass *clause_ec;
1066 :
1067 25721 : if (outer_keys)
1068 51346 : clause_ec = rinfo->outer_is_left ?
1069 25673 : rinfo->left_ec : rinfo->right_ec;
1070 : else
1071 96 : clause_ec = rinfo->outer_is_left ?
1072 48 : rinfo->right_ec : rinfo->left_ec;
1073 25721 : if (clause_ec == pathkey_ec)
1074 19025 : matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1075 : }
1076 :
1077 : /*
1078 : * If we didn't find a mergeclause, we're done --- any additional
1079 : * sort-key positions in the pathkeys are useless. (But we can still
1080 : * mergejoin if we found at least one mergeclause.)
1081 : */
1082 22097 : if (matched_restrictinfos == NIL)
1083 3081 : break;
1084 :
1085 : /*
1086 : * If we did find usable mergeclause(s) for this sort-key position,
1087 : * add them to result list.
1088 : */
1089 19016 : mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1090 : }
1091 :
1092 34155 : return mergeclauses;
1093 : }
1094 :
1095 : /*
1096 : * select_outer_pathkeys_for_merge
1097 : * Builds a pathkey list representing a possible sort ordering
1098 : * that can be used with the given mergeclauses.
1099 : *
1100 : * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1101 : * that will be used in a merge join.
1102 : * 'joinrel' is the join relation we are trying to construct.
1103 : *
1104 : * The restrictinfos must be marked (via outer_is_left) to show which side
1105 : * of each clause is associated with the current outer path. (See
1106 : * select_mergejoin_clauses())
1107 : *
1108 : * Returns a pathkeys list that can be applied to the outer relation.
1109 : *
1110 : * Since we assume here that a sort is required, there is no particular use
1111 : * in matching any available ordering of the outerrel. (joinpath.c has an
1112 : * entirely separate code path for considering sort-free mergejoins.) Rather,
1113 : * it's interesting to try to match the requested query_pathkeys so that a
1114 : * second output sort may be avoided; and failing that, we try to list "more
1115 : * popular" keys (those with the most unmatched EquivalenceClass peers)
1116 : * earlier, in hopes of making the resulting ordering useful for as many
1117 : * higher-level mergejoins as possible.
1118 : */
1119 : List *
1120 14261 : select_outer_pathkeys_for_merge(PlannerInfo *root,
1121 : List *mergeclauses,
1122 : RelOptInfo *joinrel)
1123 : {
1124 14261 : List *pathkeys = NIL;
1125 14261 : int nClauses = list_length(mergeclauses);
1126 : EquivalenceClass **ecs;
1127 : int *scores;
1128 : int necs;
1129 : ListCell *lc;
1130 : int j;
1131 :
1132 : /* Might have no mergeclauses */
1133 14261 : if (nClauses == 0)
1134 3731 : return NIL;
1135 :
1136 : /*
1137 : * Make arrays of the ECs used by the mergeclauses (dropping any
1138 : * duplicates) and their "popularity" scores.
1139 : */
1140 10530 : ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1141 10530 : scores = (int *) palloc(nClauses * sizeof(int));
1142 10530 : necs = 0;
1143 :
1144 21720 : foreach(lc, mergeclauses)
1145 : {
1146 11190 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1147 : EquivalenceClass *oeclass;
1148 : int score;
1149 : ListCell *lc2;
1150 :
1151 : /* get the outer eclass */
1152 11190 : update_mergeclause_eclasses(root, rinfo);
1153 :
1154 11190 : if (rinfo->outer_is_left)
1155 5519 : oeclass = rinfo->left_ec;
1156 : else
1157 5671 : oeclass = rinfo->right_ec;
1158 :
1159 : /* reject duplicates */
1160 11895 : for (j = 0; j < necs; j++)
1161 : {
1162 711 : if (ecs[j] == oeclass)
1163 6 : break;
1164 : }
1165 11190 : if (j < necs)
1166 6 : continue;
1167 :
1168 : /* compute score */
1169 11184 : score = 0;
1170 32406 : foreach(lc2, oeclass->ec_members)
1171 : {
1172 21222 : EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
1173 :
1174 : /* Potential future join partner? */
1175 41628 : if (!em->em_is_const && !em->em_is_child &&
1176 20406 : !bms_overlap(em->em_relids, joinrel->relids))
1177 340 : score++;
1178 : }
1179 :
1180 11184 : ecs[necs] = oeclass;
1181 11184 : scores[necs] = score;
1182 11184 : necs++;
1183 : }
1184 :
1185 : /*
1186 : * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1187 : * can generate a sort order that's also useful for final output. There is
1188 : * no percentage in a partial match, though, so we have to have 'em all.
1189 : */
1190 10530 : if (root->query_pathkeys)
1191 : {
1192 5542 : foreach(lc, root->query_pathkeys)
1193 : {
1194 5473 : PathKey *query_pathkey = (PathKey *) lfirst(lc);
1195 5473 : EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1196 :
1197 11088 : for (j = 0; j < necs; j++)
1198 : {
1199 5729 : if (ecs[j] == query_ec)
1200 114 : break; /* found match */
1201 : }
1202 5473 : if (j >= necs)
1203 5359 : break; /* didn't find match */
1204 : }
1205 : /* if we got to the end of the list, we have them all */
1206 5428 : if (lc == NULL)
1207 : {
1208 : /* copy query_pathkeys as starting point for our output */
1209 69 : pathkeys = list_copy(root->query_pathkeys);
1210 : /* mark their ECs as already-emitted */
1211 142 : foreach(lc, root->query_pathkeys)
1212 : {
1213 73 : PathKey *query_pathkey = (PathKey *) lfirst(lc);
1214 73 : EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1215 :
1216 77 : for (j = 0; j < necs; j++)
1217 : {
1218 77 : if (ecs[j] == query_ec)
1219 : {
1220 73 : scores[j] = -1;
1221 73 : break;
1222 : }
1223 : }
1224 : }
1225 : }
1226 : }
1227 :
1228 : /*
1229 : * Add remaining ECs to the list in popularity order, using a default sort
1230 : * ordering. (We could use qsort() here, but the list length is usually
1231 : * so small it's not worth it.)
1232 : */
1233 : for (;;)
1234 : {
1235 : int best_j;
1236 : int best_score;
1237 : EquivalenceClass *ec;
1238 : PathKey *pathkey;
1239 :
1240 21641 : best_j = 0;
1241 21641 : best_score = scores[0];
1242 23694 : for (j = 1; j < necs; j++)
1243 : {
1244 2053 : if (scores[j] > best_score)
1245 : {
1246 650 : best_j = j;
1247 650 : best_score = scores[j];
1248 : }
1249 : }
1250 21641 : if (best_score < 0)
1251 10530 : break; /* all done */
1252 11111 : ec = ecs[best_j];
1253 11111 : scores[best_j] = -1;
1254 11111 : pathkey = make_canonical_pathkey(root,
1255 : ec,
1256 11111 : linitial_oid(ec->ec_opfamilies),
1257 : BTLessStrategyNumber,
1258 : false);
1259 : /* can't be redundant because no duplicate ECs */
1260 11111 : Assert(!pathkey_is_redundant(pathkey, pathkeys));
1261 11111 : pathkeys = lappend(pathkeys, pathkey);
1262 11111 : }
1263 :
1264 10530 : pfree(ecs);
1265 10530 : pfree(scores);
1266 :
1267 10530 : return pathkeys;
1268 : }
1269 :
1270 : /*
1271 : * make_inner_pathkeys_for_merge
1272 : * Builds a pathkey list representing the explicit sort order that
1273 : * must be applied to an inner path to make it usable with the
1274 : * given mergeclauses.
1275 : *
1276 : * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1277 : * that will be used in a merge join.
1278 : * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1279 : * side of the join.
1280 : *
1281 : * The restrictinfos must be marked (via outer_is_left) to show which side
1282 : * of each clause is associated with the current outer path. (See
1283 : * select_mergejoin_clauses())
1284 : *
1285 : * Returns a pathkeys list that can be applied to the inner relation.
1286 : *
1287 : * Note that it is not this routine's job to decide whether sorting is
1288 : * actually needed for a particular input path. Assume a sort is necessary;
1289 : * just make the keys, eh?
1290 : */
1291 : List *
1292 17352 : make_inner_pathkeys_for_merge(PlannerInfo *root,
1293 : List *mergeclauses,
1294 : List *outer_pathkeys)
1295 : {
1296 17352 : List *pathkeys = NIL;
1297 : EquivalenceClass *lastoeclass;
1298 : PathKey *opathkey;
1299 : ListCell *lc;
1300 : ListCell *lop;
1301 :
1302 17352 : lastoeclass = NULL;
1303 17352 : opathkey = NULL;
1304 17352 : lop = list_head(outer_pathkeys);
1305 :
1306 36345 : foreach(lc, mergeclauses)
1307 : {
1308 18993 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1309 : EquivalenceClass *oeclass;
1310 : EquivalenceClass *ieclass;
1311 : PathKey *pathkey;
1312 :
1313 18993 : update_mergeclause_eclasses(root, rinfo);
1314 :
1315 18993 : if (rinfo->outer_is_left)
1316 : {
1317 8694 : oeclass = rinfo->left_ec;
1318 8694 : ieclass = rinfo->right_ec;
1319 : }
1320 : else
1321 : {
1322 10299 : oeclass = rinfo->right_ec;
1323 10299 : ieclass = rinfo->left_ec;
1324 : }
1325 :
1326 : /* outer eclass should match current or next pathkeys */
1327 : /* we check this carefully for debugging reasons */
1328 18993 : if (oeclass != lastoeclass)
1329 : {
1330 18984 : if (!lop)
1331 0 : elog(ERROR, "too few pathkeys for mergeclauses");
1332 18984 : opathkey = (PathKey *) lfirst(lop);
1333 18984 : lop = lnext(lop);
1334 18984 : lastoeclass = opathkey->pk_eclass;
1335 18984 : if (oeclass != lastoeclass)
1336 0 : elog(ERROR, "outer pathkeys do not match mergeclause");
1337 : }
1338 :
1339 : /*
1340 : * Often, we'll have same EC on both sides, in which case the outer
1341 : * pathkey is also canonical for the inner side, and we can skip a
1342 : * useless search.
1343 : */
1344 18993 : if (ieclass == oeclass)
1345 14134 : pathkey = opathkey;
1346 : else
1347 4859 : pathkey = make_canonical_pathkey(root,
1348 : ieclass,
1349 : opathkey->pk_opfamily,
1350 : opathkey->pk_strategy,
1351 4859 : opathkey->pk_nulls_first);
1352 :
1353 : /*
1354 : * Don't generate redundant pathkeys (can happen if multiple
1355 : * mergeclauses refer to same EC).
1356 : */
1357 18993 : if (!pathkey_is_redundant(pathkey, pathkeys))
1358 18982 : pathkeys = lappend(pathkeys, pathkey);
1359 : }
1360 :
1361 17352 : return pathkeys;
1362 : }
1363 :
1364 : /****************************************************************************
1365 : * PATHKEY USEFULNESS CHECKS
1366 : *
1367 : * We only want to remember as many of the pathkeys of a path as have some
1368 : * potential use, either for subsequent mergejoins or for meeting the query's
1369 : * requested output ordering. This ensures that add_path() won't consider
1370 : * a path to have a usefully different ordering unless it really is useful.
1371 : * These routines check for usefulness of given pathkeys.
1372 : ****************************************************************************/
1373 :
1374 : /*
1375 : * pathkeys_useful_for_merging
1376 : * Count the number of pathkeys that may be useful for mergejoins
1377 : * above the given relation.
1378 : *
1379 : * We consider a pathkey potentially useful if it corresponds to the merge
1380 : * ordering of either side of any joinclause for the rel. This might be
1381 : * overoptimistic, since joinclauses that require different other relations
1382 : * might never be usable at the same time, but trying to be exact is likely
1383 : * to be more trouble than it's worth.
1384 : *
1385 : * To avoid doubling the number of mergejoin paths considered, we would like
1386 : * to consider only one of the two scan directions (ASC or DESC) as useful
1387 : * for merging for any given target column. The choice is arbitrary unless
1388 : * one of the directions happens to match an ORDER BY key, in which case
1389 : * that direction should be preferred, in hopes of avoiding a final sort step.
1390 : * right_merge_direction() implements this heuristic.
1391 : */
1392 : static int
1393 67507 : pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
1394 : {
1395 67507 : int useful = 0;
1396 : ListCell *i;
1397 :
1398 79647 : foreach(i, pathkeys)
1399 : {
1400 38016 : PathKey *pathkey = (PathKey *) lfirst(i);
1401 38016 : bool matched = false;
1402 : ListCell *j;
1403 :
1404 : /* If "wrong" direction, not useful for merging */
1405 38016 : if (!right_merge_direction(root, pathkey))
1406 9129 : break;
1407 :
1408 : /*
1409 : * First look into the EquivalenceClass of the pathkey, to see if
1410 : * there are any members not yet joined to the rel. If so, it's
1411 : * surely possible to generate a mergejoin clause using them.
1412 : */
1413 43794 : if (rel->has_eclass_joins &&
1414 14907 : eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
1415 6796 : matched = true;
1416 : else
1417 : {
1418 : /*
1419 : * Otherwise search the rel's joininfo list, which contains
1420 : * non-EquivalenceClass-derivable join clauses that might
1421 : * nonetheless be mergejoinable.
1422 : */
1423 36352 : foreach(j, rel->joininfo)
1424 : {
1425 19605 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
1426 :
1427 19605 : if (restrictinfo->mergeopfamilies == NIL)
1428 1865 : continue;
1429 17740 : update_mergeclause_eclasses(root, restrictinfo);
1430 :
1431 33927 : if (pathkey->pk_eclass == restrictinfo->left_ec ||
1432 16187 : pathkey->pk_eclass == restrictinfo->right_ec)
1433 : {
1434 5344 : matched = true;
1435 5344 : break;
1436 : }
1437 : }
1438 : }
1439 :
1440 : /*
1441 : * If we didn't find a mergeclause, we're done --- any additional
1442 : * sort-key positions in the pathkeys are useless. (But we can still
1443 : * mergejoin if we found at least one mergeclause.)
1444 : */
1445 28887 : if (matched)
1446 12140 : useful++;
1447 : else
1448 16747 : break;
1449 : }
1450 :
1451 67507 : return useful;
1452 : }
1453 :
1454 : /*
1455 : * right_merge_direction
1456 : * Check whether the pathkey embodies the preferred sort direction
1457 : * for merging its target column.
1458 : */
1459 : static bool
1460 38016 : right_merge_direction(PlannerInfo *root, PathKey *pathkey)
1461 : {
1462 : ListCell *l;
1463 :
1464 68850 : foreach(l, root->query_pathkeys)
1465 : {
1466 36004 : PathKey *query_pathkey = (PathKey *) lfirst(l);
1467 :
1468 41174 : if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
1469 5170 : pathkey->pk_opfamily == query_pathkey->pk_opfamily)
1470 : {
1471 : /*
1472 : * Found a matching query sort column. Prefer this pathkey's
1473 : * direction iff it matches. Note that we ignore pk_nulls_first,
1474 : * which means that a sort might be needed anyway ... but we still
1475 : * want to prefer only one of the two possible directions, and we
1476 : * might as well use this one.
1477 : */
1478 5170 : return (pathkey->pk_strategy == query_pathkey->pk_strategy);
1479 : }
1480 : }
1481 :
1482 : /* If no matching ORDER BY request, prefer the ASC direction */
1483 32846 : return (pathkey->pk_strategy == BTLessStrategyNumber);
1484 : }
1485 :
1486 : /*
1487 : * pathkeys_useful_for_ordering
1488 : * Count the number of pathkeys that are useful for meeting the
1489 : * query's requested output ordering.
1490 : *
1491 : * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
1492 : * no good to order by just the first key(s) of the requested ordering.
1493 : * So the result is always either 0 or list_length(root->query_pathkeys).
1494 : */
1495 : static int
1496 67507 : pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
1497 : {
1498 67507 : if (root->query_pathkeys == NIL)
1499 31513 : return 0; /* no special ordering requested */
1500 :
1501 35994 : if (pathkeys == NIL)
1502 14125 : return 0; /* unordered path */
1503 :
1504 21869 : if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
1505 : {
1506 : /* It's useful ... or at least the first N keys are */
1507 1986 : return list_length(root->query_pathkeys);
1508 : }
1509 :
1510 19883 : return 0; /* path ordering not useful */
1511 : }
1512 :
1513 : /*
1514 : * truncate_useless_pathkeys
1515 : * Shorten the given pathkey list to just the useful pathkeys.
1516 : */
1517 : List *
1518 67507 : truncate_useless_pathkeys(PlannerInfo *root,
1519 : RelOptInfo *rel,
1520 : List *pathkeys)
1521 : {
1522 : int nuseful;
1523 : int nuseful2;
1524 :
1525 67507 : nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
1526 67507 : nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
1527 67507 : if (nuseful2 > nuseful)
1528 1896 : nuseful = nuseful2;
1529 :
1530 : /*
1531 : * Note: not safe to modify input list destructively, but we can avoid
1532 : * copying the list if we're not actually going to change it
1533 : */
1534 67507 : if (nuseful == 0)
1535 54781 : return NIL;
1536 12726 : else if (nuseful == list_length(pathkeys))
1537 12504 : return pathkeys;
1538 : else
1539 222 : return list_truncate(list_copy(pathkeys), nuseful);
1540 : }
1541 :
1542 : /*
1543 : * has_useful_pathkeys
1544 : * Detect whether the specified rel could have any pathkeys that are
1545 : * useful according to truncate_useless_pathkeys().
1546 : *
1547 : * This is a cheap test that lets us skip building pathkeys at all in very
1548 : * simple queries. It's OK to err in the direction of returning "true" when
1549 : * there really aren't any usable pathkeys, but erring in the other direction
1550 : * is bad --- so keep this in sync with the routines above!
1551 : *
1552 : * We could make the test more complex, for example checking to see if any of
1553 : * the joinclauses are really mergejoinable, but that likely wouldn't win
1554 : * often enough to repay the extra cycles. Queries with neither a join nor
1555 : * a sort are reasonably common, though, so this much work seems worthwhile.
1556 : */
1557 : bool
1558 26734 : has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
1559 : {
1560 26734 : if (rel->joininfo != NIL || rel->has_eclass_joins)
1561 14408 : return true; /* might be able to use pathkeys for merging */
1562 12326 : if (root->query_pathkeys != NIL)
1563 4164 : return true; /* might be able to use them for ordering */
1564 8162 : return false; /* definitely useless */
1565 : }
|