Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * analyzejoins.c
4 : * Routines for simplifying joins after initial query analysis
5 : *
6 : * While we do a great deal of join simplification in prep/prepjointree.c,
7 : * certain optimizations cannot be performed at that stage for lack of
8 : * detailed information about the query. The routines here are invoked
9 : * after initsplan.c has done its work, and can do additional join removal
10 : * and simplification steps based on the information extracted. The penalty
11 : * is that we have to work harder to clean up after ourselves when we modify
12 : * the query, since the derived data structures have to be updated too.
13 : *
14 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : *
18 : * IDENTIFICATION
19 : * src/backend/optimizer/plan/analyzejoins.c
20 : *
21 : *-------------------------------------------------------------------------
22 : */
23 : #include "postgres.h"
24 :
25 : #include "nodes/nodeFuncs.h"
26 : #include "optimizer/clauses.h"
27 : #include "optimizer/joininfo.h"
28 : #include "optimizer/pathnode.h"
29 : #include "optimizer/paths.h"
30 : #include "optimizer/planmain.h"
31 : #include "optimizer/tlist.h"
32 : #include "optimizer/var.h"
33 : #include "utils/lsyscache.h"
34 :
35 : /* local functions */
36 : static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
37 : static void remove_rel_from_query(PlannerInfo *root, int relid,
38 : Relids joinrelids);
39 : static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
40 : static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
41 : static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
42 : List *clause_list);
43 : static Oid distinct_col_search(int colno, List *colnos, List *opids);
44 : static bool is_innerrel_unique_for(PlannerInfo *root,
45 : Relids outerrelids,
46 : RelOptInfo *innerrel,
47 : JoinType jointype,
48 : List *restrictlist);
49 :
50 :
51 : /*
52 : * remove_useless_joins
53 : * Check for relations that don't actually need to be joined at all,
54 : * and remove them from the query.
55 : *
56 : * We are passed the current joinlist and return the updated list. Other
57 : * data structures that have to be updated are accessible via "root".
58 : */
59 : List *
60 14189 : remove_useless_joins(PlannerInfo *root, List *joinlist)
61 : {
62 : ListCell *lc;
63 :
64 : /*
65 : * We are only interested in relations that are left-joined to, so we can
66 : * scan the join_info_list to find them easily.
67 : */
68 : restart:
69 30598 : foreach(lc, root->join_info_list)
70 : {
71 1522 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
72 : int innerrelid;
73 : int nremoved;
74 :
75 : /* Skip if not removable */
76 1522 : if (!join_is_removable(root, sjinfo))
77 1110 : continue;
78 :
79 : /*
80 : * Currently, join_is_removable can only succeed when the sjinfo's
81 : * righthand is a single baserel. Remove that rel from the query and
82 : * joinlist.
83 : */
84 412 : innerrelid = bms_singleton_member(sjinfo->min_righthand);
85 :
86 412 : remove_rel_from_query(root, innerrelid,
87 412 : bms_union(sjinfo->min_lefthand,
88 412 : sjinfo->min_righthand));
89 :
90 : /* We verify that exactly one reference gets removed from joinlist */
91 412 : nremoved = 0;
92 412 : joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
93 412 : if (nremoved != 1)
94 0 : elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
95 :
96 : /*
97 : * We can delete this SpecialJoinInfo from the list too, since it's no
98 : * longer of interest.
99 : */
100 412 : root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
101 :
102 : /*
103 : * Restart the scan. This is necessary to ensure we find all
104 : * removable joins independently of ordering of the join_info_list
105 : * (note that removal of attr_needed bits may make a join appear
106 : * removable that did not before). Also, since we just deleted the
107 : * current list cell, we'd have to have some kluge to continue the
108 : * list scan anyway.
109 : */
110 412 : goto restart;
111 : }
112 :
113 13777 : return joinlist;
114 : }
115 :
116 : /*
117 : * clause_sides_match_join
118 : * Determine whether a join clause is of the right form to use in this join.
119 : *
120 : * We already know that the clause is a binary opclause referencing only the
121 : * rels in the current join. The point here is to check whether it has the
122 : * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
123 : * rather than mixing outer and inner vars on either side. If it matches,
124 : * we set the transient flag outer_is_left to identify which side is which.
125 : */
126 : static inline bool
127 4838 : clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
128 : Relids innerrelids)
129 : {
130 7602 : if (bms_is_subset(rinfo->left_relids, outerrelids) &&
131 2764 : bms_is_subset(rinfo->right_relids, innerrelids))
132 : {
133 : /* lefthand side is outer */
134 2764 : rinfo->outer_is_left = true;
135 2764 : return true;
136 : }
137 4148 : else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
138 2074 : bms_is_subset(rinfo->right_relids, outerrelids))
139 : {
140 : /* righthand side is outer */
141 2074 : rinfo->outer_is_left = false;
142 2074 : return true;
143 : }
144 0 : return false; /* no good for these input relations */
145 : }
146 :
147 : /*
148 : * join_is_removable
149 : * Check whether we need not perform this special join at all, because
150 : * it will just duplicate its left input.
151 : *
152 : * This is true for a left join for which the join condition cannot match
153 : * more than one inner-side row. (There are other possibly interesting
154 : * cases, but we don't have the infrastructure to prove them.) We also
155 : * have to check that the inner side doesn't generate any variables needed
156 : * above the join.
157 : */
158 : static bool
159 1522 : join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
160 : {
161 : int innerrelid;
162 : RelOptInfo *innerrel;
163 : Relids joinrelids;
164 1522 : List *clause_list = NIL;
165 : ListCell *l;
166 : int attroff;
167 :
168 : /*
169 : * Must be a non-delaying left join to a single baserel, else we aren't
170 : * going to be able to do anything with it.
171 : */
172 2713 : if (sjinfo->jointype != JOIN_LEFT ||
173 1191 : sjinfo->delay_upper_joins)
174 363 : return false;
175 :
176 1159 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
177 109 : return false;
178 :
179 1050 : innerrel = find_base_rel(root, innerrelid);
180 :
181 : /*
182 : * Before we go to the effort of checking whether any innerrel variables
183 : * are needed above the join, make a quick check to eliminate cases in
184 : * which we will surely be unable to prove uniqueness of the innerrel.
185 : */
186 1050 : if (!rel_supports_distinctness(root, innerrel))
187 121 : return false;
188 :
189 : /* Compute the relid set for the join we are considering */
190 929 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
191 :
192 : /*
193 : * We can't remove the join if any inner-rel attributes are used above the
194 : * join.
195 : *
196 : * Note that this test only detects use of inner-rel attributes in higher
197 : * join conditions and the target list. There might be such attributes in
198 : * pushed-down conditions at this join, too. We check that case below.
199 : *
200 : * As a micro-optimization, it seems better to start with max_attr and
201 : * count down rather than starting with min_attr and counting up, on the
202 : * theory that the system attributes are somewhat less likely to be wanted
203 : * and should be tested last.
204 : */
205 11190 : for (attroff = innerrel->max_attr - innerrel->min_attr;
206 : attroff >= 0;
207 9332 : attroff--)
208 : {
209 9839 : if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
210 507 : return false;
211 : }
212 :
213 : /*
214 : * Similarly check that the inner rel isn't needed by any PlaceHolderVars
215 : * that will be used above the join. We only need to fail if such a PHV
216 : * actually references some inner-rel attributes; but the correct check
217 : * for that is relatively expensive, so we first check against ph_eval_at,
218 : * which must mention the inner rel if the PHV uses any inner-rel attrs as
219 : * non-lateral references. Note that if the PHV's syntactic scope is just
220 : * the inner rel, we can't drop the rel even if the PHV is variable-free.
221 : */
222 426 : foreach(l, root->placeholder_list)
223 : {
224 9 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
225 :
226 9 : if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
227 0 : return false; /* it references innerrel laterally */
228 9 : if (bms_is_subset(phinfo->ph_needed, joinrelids))
229 4 : continue; /* PHV is not used above the join */
230 5 : if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
231 0 : continue; /* it definitely doesn't reference innerrel */
232 5 : if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
233 3 : return false; /* there isn't any other place to eval PHV */
234 2 : if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr),
235 2 : innerrel->relids))
236 2 : return false; /* it does reference innerrel */
237 : }
238 :
239 : /*
240 : * Search for mergejoinable clauses that constrain the inner rel against
241 : * either the outer rel or a pseudoconstant. If an operator is
242 : * mergejoinable then it behaves like equality for some btree opclass, so
243 : * it's what we want. The mergejoinability test also eliminates clauses
244 : * containing volatile functions, which we couldn't depend on.
245 : */
246 858 : foreach(l, innerrel->joininfo)
247 : {
248 441 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
249 :
250 : /*
251 : * If it's not a join clause for this outer join, we can't use it.
252 : * Note that if the clause is pushed-down, then it is logically from
253 : * above the outer join, even if it references no other rels (it might
254 : * be from WHERE, for example).
255 : */
256 878 : if (restrictinfo->is_pushed_down ||
257 437 : !bms_equal(restrictinfo->required_relids, joinrelids))
258 : {
259 : /*
260 : * If such a clause actually references the inner rel then join
261 : * removal has to be disallowed. We have to check this despite
262 : * the previous attr_needed checks because of the possibility of
263 : * pushed-down clauses referencing the rel.
264 : */
265 4 : if (bms_is_member(innerrelid, restrictinfo->clause_relids))
266 0 : return false;
267 4 : continue; /* else, ignore; not useful here */
268 : }
269 :
270 : /* Ignore if it's not a mergejoinable clause */
271 874 : if (!restrictinfo->can_join ||
272 437 : restrictinfo->mergeopfamilies == NIL)
273 0 : continue; /* not mergejoinable */
274 :
275 : /*
276 : * Check if clause has the form "outer op inner" or "inner op outer",
277 : * and if so mark which side is inner.
278 : */
279 437 : if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
280 : innerrel->relids))
281 0 : continue; /* no good for these input relations */
282 :
283 : /* OK, add to list */
284 437 : clause_list = lappend(clause_list, restrictinfo);
285 : }
286 :
287 : /*
288 : * Now that we have the relevant equality join clauses, try to prove the
289 : * innerrel distinct.
290 : */
291 417 : if (rel_is_distinct_for(root, innerrel, clause_list))
292 412 : return true;
293 :
294 : /*
295 : * Some day it would be nice to check for other methods of establishing
296 : * distinctness.
297 : */
298 5 : return false;
299 : }
300 :
301 :
302 : /*
303 : * Remove the target relid from the planner's data structures, having
304 : * determined that there is no need to include it in the query.
305 : *
306 : * We are not terribly thorough here. We must make sure that the rel is
307 : * no longer treated as a baserel, and that attributes of other baserels
308 : * are no longer marked as being needed at joins involving this rel.
309 : * Also, join quals involving the rel have to be removed from the joininfo
310 : * lists, but only if they belong to the outer join identified by joinrelids.
311 : */
312 : static void
313 412 : remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
314 : {
315 412 : RelOptInfo *rel = find_base_rel(root, relid);
316 : List *joininfos;
317 : Index rti;
318 : ListCell *l;
319 : ListCell *nextl;
320 :
321 : /*
322 : * Mark the rel as "dead" to show it is no longer part of the join tree.
323 : * (Removing it from the baserel array altogether seems too risky.)
324 : */
325 412 : rel->reloptkind = RELOPT_DEADREL;
326 :
327 : /*
328 : * Remove references to the rel from other baserels' attr_needed arrays.
329 : */
330 3185 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
331 : {
332 2773 : RelOptInfo *otherrel = root->simple_rel_array[rti];
333 : int attroff;
334 :
335 : /* there may be empty slots corresponding to non-baserel RTEs */
336 2773 : if (otherrel == NULL)
337 1644 : continue;
338 :
339 1129 : Assert(otherrel->relid == rti); /* sanity check on array */
340 :
341 : /* no point in processing target rel itself */
342 1129 : if (otherrel == rel)
343 412 : continue;
344 :
345 17324 : for (attroff = otherrel->max_attr - otherrel->min_attr;
346 : attroff >= 0;
347 15890 : attroff--)
348 : {
349 31780 : otherrel->attr_needed[attroff] =
350 15890 : bms_del_member(otherrel->attr_needed[attroff], relid);
351 : }
352 : }
353 :
354 : /*
355 : * Likewise remove references from SpecialJoinInfo data structures.
356 : *
357 : * This is relevant in case the outer join we're deleting is nested inside
358 : * other outer joins: the upper joins' relid sets have to be adjusted. The
359 : * RHS of the target outer join will be made empty here, but that's OK
360 : * since caller will delete that SpecialJoinInfo entirely.
361 : */
362 922 : foreach(l, root->join_info_list)
363 : {
364 510 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
365 :
366 510 : sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
367 510 : sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
368 510 : sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
369 510 : sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
370 : }
371 :
372 : /*
373 : * Likewise remove references from PlaceHolderVar data structures,
374 : * removing any no-longer-needed placeholders entirely.
375 : *
376 : * Removal is a bit tricker than it might seem: we can remove PHVs that
377 : * are used at the target rel and/or in the join qual, but not those that
378 : * are used at join partner rels or above the join. It's not that easy to
379 : * distinguish PHVs used at partner rels from those used in the join qual,
380 : * since they will both have ph_needed sets that are subsets of
381 : * joinrelids. However, a PHV used at a partner rel could not have the
382 : * target rel in ph_eval_at, so we check that while deciding whether to
383 : * remove or just update the PHV. There is no corresponding test in
384 : * join_is_removable because it doesn't need to distinguish those cases.
385 : */
386 416 : for (l = list_head(root->placeholder_list); l != NULL; l = nextl)
387 : {
388 4 : PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
389 :
390 4 : nextl = lnext(l);
391 4 : Assert(!bms_is_member(relid, phinfo->ph_lateral));
392 8 : if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
393 4 : bms_is_member(relid, phinfo->ph_eval_at))
394 1 : root->placeholder_list = list_delete_ptr(root->placeholder_list,
395 : phinfo);
396 : else
397 : {
398 3 : phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
399 3 : Assert(!bms_is_empty(phinfo->ph_eval_at));
400 3 : phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
401 : }
402 : }
403 :
404 : /*
405 : * Remove any joinquals referencing the rel from the joininfo lists.
406 : *
407 : * In some cases, a joinqual has to be put back after deleting its
408 : * reference to the target rel. This can occur for pseudoconstant and
409 : * outerjoin-delayed quals, which can get marked as requiring the rel in
410 : * order to force them to be evaluated at or above the join. We can't
411 : * just discard them, though. Only quals that logically belonged to the
412 : * outer join being discarded should be removed from the query.
413 : *
414 : * We must make a copy of the rel's old joininfo list before starting the
415 : * loop, because otherwise remove_join_clause_from_rels would destroy the
416 : * list while we're scanning it.
417 : */
418 412 : joininfos = list_copy(rel->joininfo);
419 848 : foreach(l, joininfos)
420 : {
421 436 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
422 :
423 436 : remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
424 :
425 868 : if (rinfo->is_pushed_down ||
426 432 : !bms_equal(rinfo->required_relids, joinrelids))
427 : {
428 : /* Recheck that qual doesn't actually reference the target rel */
429 4 : Assert(!bms_is_member(relid, rinfo->clause_relids));
430 :
431 : /*
432 : * The required_relids probably aren't shared with anything else,
433 : * but let's copy them just to be sure.
434 : */
435 4 : rinfo->required_relids = bms_copy(rinfo->required_relids);
436 4 : rinfo->required_relids = bms_del_member(rinfo->required_relids,
437 : relid);
438 4 : distribute_restrictinfo_to_rels(root, rinfo);
439 : }
440 : }
441 :
442 : /*
443 : * There may be references to the rel in root->fkey_list, but if so,
444 : * match_foreign_keys_to_quals() will get rid of them.
445 : */
446 412 : }
447 :
448 : /*
449 : * Remove any occurrences of the target relid from a joinlist structure.
450 : *
451 : * It's easiest to build a whole new list structure, so we handle it that
452 : * way. Efficiency is not a big deal here.
453 : *
454 : * *nremoved is incremented by the number of occurrences removed (there
455 : * should be exactly one, but the caller checks that).
456 : */
457 : static List *
458 446 : remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
459 : {
460 446 : List *result = NIL;
461 : ListCell *jl;
462 :
463 1591 : foreach(jl, joinlist)
464 : {
465 1145 : Node *jlnode = (Node *) lfirst(jl);
466 :
467 1145 : if (IsA(jlnode, RangeTblRef))
468 : {
469 1111 : int varno = ((RangeTblRef *) jlnode)->rtindex;
470 :
471 1111 : if (varno == relid)
472 412 : (*nremoved)++;
473 : else
474 699 : result = lappend(result, jlnode);
475 : }
476 34 : else if (IsA(jlnode, List))
477 : {
478 : /* Recurse to handle subproblem */
479 : List *sublist;
480 :
481 34 : sublist = remove_rel_from_joinlist((List *) jlnode,
482 : relid, nremoved);
483 : /* Avoid including empty sub-lists in the result */
484 34 : if (sublist)
485 34 : result = lappend(result, sublist);
486 : }
487 : else
488 : {
489 0 : elog(ERROR, "unrecognized joinlist node type: %d",
490 : (int) nodeTag(jlnode));
491 : }
492 : }
493 :
494 446 : return result;
495 : }
496 :
497 :
498 : /*
499 : * reduce_unique_semijoins
500 : * Check for semijoins that can be simplified to plain inner joins
501 : * because the inner relation is provably unique for the join clauses.
502 : *
503 : * Ideally this would happen during reduce_outer_joins, but we don't have
504 : * enough information at that point.
505 : *
506 : * To perform the strength reduction when applicable, we need only delete
507 : * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
508 : * bother fixing the join type attributed to it in the query jointree,
509 : * since that won't be consulted again.)
510 : */
511 : void
512 13777 : reduce_unique_semijoins(PlannerInfo *root)
513 : {
514 : ListCell *lc;
515 : ListCell *next;
516 :
517 : /*
518 : * Scan the join_info_list to find semijoins. We can't use foreach
519 : * because we may delete the current cell.
520 : */
521 14886 : for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
522 : {
523 1109 : SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
524 : int innerrelid;
525 : RelOptInfo *innerrel;
526 : Relids joinrelids;
527 : List *restrictlist;
528 :
529 1109 : next = lnext(lc);
530 :
531 : /*
532 : * Must be a non-delaying semijoin to a single baserel, else we aren't
533 : * going to be able to do anything with it. (It's probably not
534 : * possible for delay_upper_joins to be set on a semijoin, but we
535 : * might as well check.)
536 : */
537 1182 : if (sjinfo->jointype != JOIN_SEMI ||
538 73 : sjinfo->delay_upper_joins)
539 2131 : continue;
540 :
541 73 : if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
542 8 : continue;
543 :
544 65 : innerrel = find_base_rel(root, innerrelid);
545 :
546 : /*
547 : * Before we trouble to run generate_join_implied_equalities, make a
548 : * quick check to eliminate cases in which we will surely be unable to
549 : * prove uniqueness of the innerrel.
550 : */
551 65 : if (!rel_supports_distinctness(root, innerrel))
552 48 : continue;
553 :
554 : /* Compute the relid set for the join we are considering */
555 17 : joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
556 :
557 : /*
558 : * Since we're only considering a single-rel RHS, any join clauses it
559 : * has must be clauses linking it to the semijoin's min_lefthand. We
560 : * can also consider EC-derived join clauses.
561 : */
562 17 : restrictlist =
563 17 : list_concat(generate_join_implied_equalities(root,
564 : joinrelids,
565 : sjinfo->min_lefthand,
566 : innerrel),
567 : innerrel->joininfo);
568 :
569 : /* Test whether the innerrel is unique for those clauses. */
570 17 : if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel,
571 : JOIN_SEMI, restrictlist, true))
572 3 : continue;
573 :
574 : /* OK, remove the SpecialJoinInfo from the list. */
575 14 : root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
576 : }
577 13777 : }
578 :
579 :
580 : /*
581 : * rel_supports_distinctness
582 : * Could the relation possibly be proven distinct on some set of columns?
583 : *
584 : * This is effectively a pre-checking function for rel_is_distinct_for().
585 : * It must return TRUE if rel_is_distinct_for() could possibly return TRUE
586 : * with this rel, but it should not expend a lot of cycles. The idea is
587 : * that callers can avoid doing possibly-expensive processing to compute
588 : * rel_is_distinct_for()'s argument lists if the call could not possibly
589 : * succeed.
590 : */
591 : static bool
592 11993 : rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
593 : {
594 : /* We only know about baserels ... */
595 11993 : if (rel->reloptkind != RELOPT_BASEREL)
596 3299 : return false;
597 8694 : if (rel->rtekind == RTE_RELATION)
598 : {
599 : /*
600 : * For a plain relation, we only know how to prove uniqueness by
601 : * reference to unique indexes. Make sure there's at least one
602 : * suitable unique index. It must be immediately enforced, and if
603 : * it's a partial index, it must match the query. (Keep these
604 : * conditions in sync with relation_has_unique_index_for!)
605 : */
606 : ListCell *lc;
607 :
608 10748 : foreach(lc, rel->indexlist)
609 : {
610 8921 : IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
611 :
612 15097 : if (ind->unique && ind->immediate &&
613 6176 : (ind->indpred == NIL || ind->predOK))
614 6176 : return true;
615 : }
616 : }
617 691 : else if (rel->rtekind == RTE_SUBQUERY)
618 : {
619 332 : Query *subquery = root->simple_rte_array[rel->relid]->subquery;
620 :
621 : /* Check if the subquery has any qualities that support distinctness */
622 332 : if (query_supports_distinctness(subquery))
623 154 : return true;
624 : }
625 : /* We have no proof rules for any other rtekinds. */
626 2364 : return false;
627 : }
628 :
629 : /*
630 : * rel_is_distinct_for
631 : * Does the relation return only distinct rows according to clause_list?
632 : *
633 : * clause_list is a list of join restriction clauses involving this rel and
634 : * some other one. Return true if no two rows emitted by this rel could
635 : * possibly join to the same row of the other rel.
636 : *
637 : * The caller must have already determined that each condition is a
638 : * mergejoinable equality with an expression in this relation on one side, and
639 : * an expression not involving this relation on the other. The transient
640 : * outer_is_left flag is used to identify which side references this relation:
641 : * left side if outer_is_left is false, right side if it is true.
642 : *
643 : * Note that the passed-in clause_list may be destructively modified! This
644 : * is OK for current uses, because the clause_list is built by the caller for
645 : * the sole purpose of passing to this function.
646 : */
647 : static bool
648 4597 : rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
649 : {
650 : /*
651 : * We could skip a couple of tests here if we assume all callers checked
652 : * rel_supports_distinctness first, but it doesn't seem worth taking any
653 : * risk for.
654 : */
655 4597 : if (rel->reloptkind != RELOPT_BASEREL)
656 0 : return false;
657 4597 : if (rel->rtekind == RTE_RELATION)
658 : {
659 : /*
660 : * Examine the indexes to see if we have a matching unique index.
661 : * relation_has_unique_index_for automatically adds any usable
662 : * restriction clauses for the rel, so we needn't do that here.
663 : */
664 4471 : if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
665 2927 : return true;
666 : }
667 126 : else if (rel->rtekind == RTE_SUBQUERY)
668 : {
669 126 : Index relid = rel->relid;
670 126 : Query *subquery = root->simple_rte_array[relid]->subquery;
671 126 : List *colnos = NIL;
672 126 : List *opids = NIL;
673 : ListCell *l;
674 :
675 : /*
676 : * Build the argument lists for query_is_distinct_for: a list of
677 : * output column numbers that the query needs to be distinct over, and
678 : * a list of equality operators that the output columns need to be
679 : * distinct according to.
680 : *
681 : * (XXX we are not considering restriction clauses attached to the
682 : * subquery; is that worth doing?)
683 : */
684 232 : foreach(l, clause_list)
685 : {
686 106 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
687 : Oid op;
688 : Var *var;
689 :
690 : /*
691 : * Get the equality operator we need uniqueness according to.
692 : * (This might be a cross-type operator and thus not exactly the
693 : * same operator the subquery would consider; that's all right
694 : * since query_is_distinct_for can resolve such cases.) The
695 : * caller's mergejoinability test should have selected only
696 : * OpExprs.
697 : */
698 106 : op = castNode(OpExpr, rinfo->clause)->opno;
699 :
700 : /* caller identified the inner side for us */
701 106 : if (rinfo->outer_is_left)
702 88 : var = (Var *) get_rightop(rinfo->clause);
703 : else
704 18 : var = (Var *) get_leftop(rinfo->clause);
705 :
706 : /*
707 : * If inner side isn't a Var referencing a subquery output column,
708 : * this clause doesn't help us.
709 : */
710 210 : if (!var || !IsA(var, Var) ||
711 208 : var->varno != relid || var->varlevelsup != 0)
712 2 : continue;
713 :
714 104 : colnos = lappend_int(colnos, var->varattno);
715 104 : opids = lappend_oid(opids, op);
716 : }
717 :
718 126 : if (query_is_distinct_for(subquery, colnos, opids))
719 22 : return true;
720 : }
721 1648 : return false;
722 : }
723 :
724 :
725 : /*
726 : * query_supports_distinctness - could the query possibly be proven distinct
727 : * on some set of output columns?
728 : *
729 : * This is effectively a pre-checking function for query_is_distinct_for().
730 : * It must return TRUE if query_is_distinct_for() could possibly return TRUE
731 : * with this query, but it should not expend a lot of cycles. The idea is
732 : * that callers can avoid doing possibly-expensive processing to compute
733 : * query_is_distinct_for()'s argument lists if the call could not possibly
734 : * succeed.
735 : */
736 : bool
737 340 : query_supports_distinctness(Query *query)
738 : {
739 : /* we don't cope with SRFs, see comment below */
740 340 : if (query->hasTargetSRFs)
741 105 : return false;
742 :
743 : /* check for features we can prove distinctness with */
744 454 : if (query->distinctClause != NIL ||
745 417 : query->groupClause != NIL ||
746 396 : query->groupingSets != NIL ||
747 380 : query->hasAggs ||
748 364 : query->havingQual ||
749 182 : query->setOperations)
750 157 : return true;
751 :
752 78 : return false;
753 : }
754 :
755 : /*
756 : * query_is_distinct_for - does query never return duplicates of the
757 : * specified columns?
758 : *
759 : * query is a not-yet-planned subquery (in current usage, it's always from
760 : * a subquery RTE, which the planner avoids scribbling on).
761 : *
762 : * colnos is an integer list of output column numbers (resno's). We are
763 : * interested in whether rows consisting of just these columns are certain
764 : * to be distinct. "Distinctness" is defined according to whether the
765 : * corresponding upper-level equality operators listed in opids would think
766 : * the values are distinct. (Note: the opids entries could be cross-type
767 : * operators, and thus not exactly the equality operators that the subquery
768 : * would use itself. We use equality_ops_are_compatible() to check
769 : * compatibility. That looks at btree or hash opfamily membership, and so
770 : * should give trustworthy answers for all operators that we might need
771 : * to deal with here.)
772 : */
773 : bool
774 129 : query_is_distinct_for(Query *query, List *colnos, List *opids)
775 : {
776 : ListCell *l;
777 : Oid opid;
778 :
779 129 : Assert(list_length(colnos) == list_length(opids));
780 :
781 : /*
782 : * A set-returning function in the query's targetlist can result in
783 : * returning duplicate rows, if the SRF is evaluated after the
784 : * de-duplication step; so we play it safe and say "no" if there are any
785 : * SRFs. (We could be certain that it's okay if SRFs appear only in the
786 : * specified columns, since those must be evaluated before de-duplication;
787 : * but it doesn't presently seem worth the complication to check that.)
788 : */
789 129 : if (query->hasTargetSRFs)
790 0 : return false;
791 :
792 : /*
793 : * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
794 : * columns in the DISTINCT clause appear in colnos and operator semantics
795 : * match.
796 : */
797 129 : if (query->distinctClause)
798 : {
799 17 : foreach(l, query->distinctClause)
800 : {
801 12 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
802 12 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
803 : query->targetList);
804 :
805 12 : opid = distinct_col_search(tle->resno, colnos, opids);
806 20 : if (!OidIsValid(opid) ||
807 8 : !equality_ops_are_compatible(opid, sgc->eqop))
808 : break; /* exit early if no match */
809 : }
810 9 : if (l == NULL) /* had matches for all? */
811 5 : return true;
812 : }
813 :
814 : /*
815 : * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
816 : * the grouped columns appear in colnos and operator semantics match.
817 : */
818 124 : if (query->groupClause && !query->groupingSets)
819 : {
820 21 : foreach(l, query->groupClause)
821 : {
822 14 : SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
823 14 : TargetEntry *tle = get_sortgroupclause_tle(sgc,
824 : query->targetList);
825 :
826 14 : opid = distinct_col_search(tle->resno, colnos, opids);
827 24 : if (!OidIsValid(opid) ||
828 10 : !equality_ops_are_compatible(opid, sgc->eqop))
829 : break; /* exit early if no match */
830 : }
831 15 : if (l == NULL) /* had matches for all? */
832 7 : return true;
833 : }
834 113 : else if (query->groupingSets)
835 : {
836 : /*
837 : * If we have grouping sets with expressions, we probably don't have
838 : * uniqueness and analysis would be hard. Punt.
839 : */
840 0 : if (query->groupClause)
841 0 : return false;
842 :
843 : /*
844 : * If we have no groupClause (therefore no grouping expressions), we
845 : * might have one or many empty grouping sets. If there's just one,
846 : * then we're returning only one row and are certainly unique. But
847 : * otherwise, we know we're certainly not unique.
848 : */
849 0 : if (list_length(query->groupingSets) == 1 &&
850 0 : ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
851 0 : return true;
852 : else
853 0 : return false;
854 : }
855 : else
856 : {
857 : /*
858 : * If we have no GROUP BY, but do have aggregates or HAVING, then the
859 : * result is at most one row so it's surely unique, for any operators.
860 : */
861 113 : if (query->hasAggs || query->havingQual)
862 8 : return true;
863 : }
864 :
865 : /*
866 : * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
867 : * except with ALL.
868 : */
869 109 : if (query->setOperations)
870 : {
871 101 : SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
872 :
873 101 : Assert(topop->op != SETOP_NONE);
874 :
875 101 : if (!topop->all)
876 : {
877 : ListCell *lg;
878 :
879 : /* We're good if all the nonjunk output columns are in colnos */
880 4 : lg = list_head(topop->groupClauses);
881 6 : foreach(l, query->targetList)
882 : {
883 4 : TargetEntry *tle = (TargetEntry *) lfirst(l);
884 : SortGroupClause *sgc;
885 :
886 4 : if (tle->resjunk)
887 0 : continue; /* ignore resjunk columns */
888 :
889 : /* non-resjunk columns should have grouping clauses */
890 4 : Assert(lg != NULL);
891 4 : sgc = (SortGroupClause *) lfirst(lg);
892 4 : lg = lnext(lg);
893 :
894 4 : opid = distinct_col_search(tle->resno, colnos, opids);
895 6 : if (!OidIsValid(opid) ||
896 2 : !equality_ops_are_compatible(opid, sgc->eqop))
897 : break; /* exit early if no match */
898 : }
899 4 : if (l == NULL) /* had matches for all? */
900 2 : return true;
901 : }
902 : }
903 :
904 : /*
905 : * XXX Are there any other cases in which we can easily see the result
906 : * must be distinct?
907 : *
908 : * If you do add more smarts to this function, be sure to update
909 : * query_supports_distinctness() to match.
910 : */
911 :
912 107 : return false;
913 : }
914 :
915 : /*
916 : * distinct_col_search - subroutine for query_is_distinct_for
917 : *
918 : * If colno is in colnos, return the corresponding element of opids,
919 : * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
920 : * but if it does, we arbitrarily select the first match.)
921 : */
922 : static Oid
923 30 : distinct_col_search(int colno, List *colnos, List *opids)
924 : {
925 : ListCell *lc1,
926 : *lc2;
927 :
928 42 : forboth(lc1, colnos, lc2, opids)
929 : {
930 32 : if (colno == lfirst_int(lc1))
931 20 : return lfirst_oid(lc2);
932 : }
933 10 : return InvalidOid;
934 : }
935 :
936 :
937 : /*
938 : * innerrel_is_unique
939 : * Check if the innerrel provably contains at most one tuple matching any
940 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
941 : *
942 : * We need an actual RelOptInfo for the innerrel, but it's sufficient to
943 : * identify the outerrel by its Relids. This asymmetry supports use of this
944 : * function before joinrels have been built.
945 : *
946 : * The proof must be made based only on clauses that will be "joinquals"
947 : * rather than "otherquals" at execution. For an inner join there's no
948 : * difference; but if the join is outer, we must ignore pushed-down quals,
949 : * as those will become "otherquals". Note that this means the answer might
950 : * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
951 : * answer without regard to that, callers must take care not to call this
952 : * with jointypes that would be classified differently by IS_OUTER_JOIN().
953 : *
954 : * The actual proof is undertaken by is_innerrel_unique_for(); this function
955 : * is a frontend that is mainly concerned with caching the answers.
956 : * In particular, the force_cache argument allows overriding the internal
957 : * heuristic about whether to cache negative answers; it should be "true"
958 : * if making an inquiry that is not part of the normal bottom-up join search
959 : * sequence.
960 : */
961 : bool
962 14174 : innerrel_is_unique(PlannerInfo *root,
963 : Relids outerrelids,
964 : RelOptInfo *innerrel,
965 : JoinType jointype,
966 : List *restrictlist,
967 : bool force_cache)
968 : {
969 : MemoryContext old_context;
970 : ListCell *lc;
971 :
972 : /* Certainly can't prove uniqueness when there are no joinclauses */
973 14174 : if (restrictlist == NIL)
974 3296 : return false;
975 :
976 : /*
977 : * Make a quick check to eliminate cases in which we will surely be unable
978 : * to prove uniqueness of the innerrel.
979 : */
980 10878 : if (!rel_supports_distinctness(root, innerrel))
981 5494 : return false;
982 :
983 : /*
984 : * Query the cache to see if we've managed to prove that innerrel is
985 : * unique for any subset of this outerrel. We don't need an exact match,
986 : * as extra outerrels can't make the innerrel any less unique (or more
987 : * formally, the restrictlist for a join to a superset outerrel must be a
988 : * superset of the conditions we successfully used before).
989 : */
990 5616 : foreach(lc, innerrel->unique_for_rels)
991 : {
992 1436 : Relids unique_for_rels = (Relids) lfirst(lc);
993 :
994 1436 : if (bms_is_subset(unique_for_rels, outerrelids))
995 1204 : return true; /* Success! */
996 : }
997 :
998 : /*
999 : * Conversely, we may have already determined that this outerrel, or some
1000 : * superset thereof, cannot prove this innerrel to be unique.
1001 : */
1002 4180 : foreach(lc, innerrel->non_unique_for_rels)
1003 : {
1004 0 : Relids unique_for_rels = (Relids) lfirst(lc);
1005 :
1006 0 : if (bms_is_subset(outerrelids, unique_for_rels))
1007 0 : return false;
1008 : }
1009 :
1010 : /* No cached information, so try to make the proof. */
1011 4180 : if (is_innerrel_unique_for(root, outerrelids, innerrel,
1012 : jointype, restrictlist))
1013 : {
1014 : /*
1015 : * Cache the positive result for future probes, being sure to keep it
1016 : * in the planner_cxt even if we are working in GEQO.
1017 : *
1018 : * Note: one might consider trying to isolate the minimal subset of
1019 : * the outerrels that proved the innerrel unique. But it's not worth
1020 : * the trouble, because the planner builds up joinrels incrementally
1021 : * and so we'll see the minimally sufficient outerrels before any
1022 : * supersets of them anyway.
1023 : */
1024 2537 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1025 2537 : innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1026 2537 : bms_copy(outerrelids));
1027 2537 : MemoryContextSwitchTo(old_context);
1028 :
1029 2537 : return true; /* Success! */
1030 : }
1031 : else
1032 : {
1033 : /*
1034 : * None of the join conditions for outerrel proved innerrel unique, so
1035 : * we can safely reject this outerrel or any subset of it in future
1036 : * checks.
1037 : *
1038 : * However, in normal planning mode, caching this knowledge is totally
1039 : * pointless; it won't be queried again, because we build up joinrels
1040 : * from smaller to larger. It is useful in GEQO mode, where the
1041 : * knowledge can be carried across successive planning attempts; and
1042 : * it's likely to be useful when using join-search plugins, too. Hence
1043 : * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1044 : * but it seems reasonable.)
1045 : *
1046 : * Also, allow callers to override that heuristic and force caching;
1047 : * that's useful for reduce_unique_semijoins, which calls here before
1048 : * the normal join search starts.
1049 : */
1050 1643 : if (force_cache || root->join_search_private)
1051 : {
1052 3 : old_context = MemoryContextSwitchTo(root->planner_cxt);
1053 3 : innerrel->non_unique_for_rels =
1054 3 : lappend(innerrel->non_unique_for_rels,
1055 3 : bms_copy(outerrelids));
1056 3 : MemoryContextSwitchTo(old_context);
1057 : }
1058 :
1059 1643 : return false;
1060 : }
1061 : }
1062 :
1063 : /*
1064 : * is_innerrel_unique_for
1065 : * Check if the innerrel provably contains at most one tuple matching any
1066 : * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1067 : */
1068 : static bool
1069 4180 : is_innerrel_unique_for(PlannerInfo *root,
1070 : Relids outerrelids,
1071 : RelOptInfo *innerrel,
1072 : JoinType jointype,
1073 : List *restrictlist)
1074 : {
1075 4180 : List *clause_list = NIL;
1076 : ListCell *lc;
1077 :
1078 : /*
1079 : * Search for mergejoinable clauses that constrain the inner rel against
1080 : * the outer rel. If an operator is mergejoinable then it behaves like
1081 : * equality for some btree opclass, so it's what we want. The
1082 : * mergejoinability test also eliminates clauses containing volatile
1083 : * functions, which we couldn't depend on.
1084 : */
1085 8966 : foreach(lc, restrictlist)
1086 : {
1087 4786 : RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1088 :
1089 : /*
1090 : * As noted above, if it's a pushed-down clause and we're at an outer
1091 : * join, we can't use it.
1092 : */
1093 4786 : if (restrictinfo->is_pushed_down && IS_OUTER_JOIN(jointype))
1094 25 : continue;
1095 :
1096 : /* Ignore if it's not a mergejoinable clause */
1097 9253 : if (!restrictinfo->can_join ||
1098 4492 : restrictinfo->mergeopfamilies == NIL)
1099 360 : continue; /* not mergejoinable */
1100 :
1101 : /*
1102 : * Check if clause has the form "outer op inner" or "inner op outer",
1103 : * and if so mark which side is inner.
1104 : */
1105 4401 : if (!clause_sides_match_join(restrictinfo, outerrelids,
1106 : innerrel->relids))
1107 0 : continue; /* no good for these input relations */
1108 :
1109 : /* OK, add to list */
1110 4401 : clause_list = lappend(clause_list, restrictinfo);
1111 : }
1112 :
1113 : /* Let rel_is_distinct_for() do the hard work */
1114 4180 : return rel_is_distinct_for(root, innerrel, clause_list);
1115 : }
|