Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * clausesel.c
4 : * Routines to compute clause selectivities
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/path/clausesel.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "nodes/makefuncs.h"
18 : #include "optimizer/clauses.h"
19 : #include "optimizer/cost.h"
20 : #include "optimizer/pathnode.h"
21 : #include "optimizer/plancat.h"
22 : #include "utils/fmgroids.h"
23 : #include "utils/lsyscache.h"
24 : #include "utils/selfuncs.h"
25 : #include "statistics/statistics.h"
26 :
27 :
28 : /*
29 : * Data structure for accumulating info about possible range-query
30 : * clause pairs in clauselist_selectivity.
31 : */
32 : typedef struct RangeQueryClause
33 : {
34 : struct RangeQueryClause *next; /* next in linked list */
35 : Node *var; /* The common variable of the clauses */
36 : bool have_lobound; /* found a low-bound clause yet? */
37 : bool have_hibound; /* found a high-bound clause yet? */
38 : Selectivity lobound; /* Selectivity of a var > something clause */
39 : Selectivity hibound; /* Selectivity of a var < something clause */
40 : } RangeQueryClause;
41 :
42 : static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
43 : bool varonleft, bool isLTsel, Selectivity s2);
44 : static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
45 : List *clauses);
46 :
47 : /****************************************************************************
48 : * ROUTINES TO COMPUTE SELECTIVITIES
49 : ****************************************************************************/
50 :
51 : /*
52 : * clauselist_selectivity -
53 : * Compute the selectivity of an implicitly-ANDed list of boolean
54 : * expression clauses. The list can be empty, in which case 1.0
55 : * must be returned. List elements may be either RestrictInfos
56 : * or bare expression clauses --- the former is preferred since
57 : * it allows caching of results.
58 : *
59 : * See clause_selectivity() for the meaning of the additional parameters.
60 : *
61 : * Our basic approach is to take the product of the selectivities of the
62 : * subclauses. However, that's only right if the subclauses have independent
63 : * probabilities, and in reality they are often NOT independent. So,
64 : * we want to be smarter where we can.
65 : *
66 : * If the clauses taken together refer to just one relation, we'll try to
67 : * apply selectivity estimates using any extended statistics for that rel.
68 : * Currently we only have (soft) functional dependencies, so apply these in as
69 : * many cases as possible, and fall back on normal estimates for remaining
70 : * clauses.
71 : *
72 : * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
73 : * are recognized as possible range query components if they are restriction
74 : * opclauses whose operators have scalarltsel() or scalargtsel() as their
75 : * restriction selectivity estimator. We pair up clauses of this form that
76 : * refer to the same variable. An unpairable clause of this kind is simply
77 : * multiplied into the selectivity product in the normal way. But when we
78 : * find a pair, we know that the selectivities represent the relative
79 : * positions of the low and high bounds within the column's range, so instead
80 : * of figuring the selectivity as hisel * losel, we can figure it as hisel +
81 : * losel - 1. (To visualize this, see that hisel is the fraction of the range
82 : * below the high bound, while losel is the fraction above the low bound; so
83 : * hisel can be interpreted directly as a 0..1 value but we need to convert
84 : * losel to 1-losel before interpreting it as a value. Then the available
85 : * range is 1-losel to hisel. However, this calculation double-excludes
86 : * nulls, so really we need hisel + losel + null_frac - 1.)
87 : *
88 : * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
89 : * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
90 : * yields an impossible (negative) result.
91 : *
92 : * A free side-effect is that we can recognize redundant inequalities such
93 : * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
94 : *
95 : * Of course this is all very dependent on the behavior of
96 : * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
97 : */
98 : Selectivity
99 75097 : clauselist_selectivity(PlannerInfo *root,
100 : List *clauses,
101 : int varRelid,
102 : JoinType jointype,
103 : SpecialJoinInfo *sjinfo)
104 : {
105 75097 : Selectivity s1 = 1.0;
106 : RelOptInfo *rel;
107 75097 : Bitmapset *estimatedclauses = NULL;
108 75097 : RangeQueryClause *rqlist = NULL;
109 : ListCell *l;
110 : int listidx;
111 :
112 : /*
113 : * If there's exactly one clause, just go directly to
114 : * clause_selectivity(). None of what we might do below is relevant.
115 : */
116 75097 : if (list_length(clauses) == 1)
117 38790 : return clause_selectivity(root, (Node *) linitial(clauses),
118 : varRelid, jointype, sjinfo);
119 :
120 : /*
121 : * Determine if these clauses reference a single relation. If so, and if
122 : * it has extended statistics, try to apply those.
123 : */
124 36307 : rel = find_single_rel_for_clauses(root, clauses);
125 36307 : if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
126 : {
127 : /*
128 : * Perform selectivity estimations on any clauses found applicable by
129 : * dependencies_clauselist_selectivity. 'estimatedclauses' will be
130 : * filled with the 0-based list positions of clauses used that way, so
131 : * that we can ignore them below.
132 : */
133 20 : s1 *= dependencies_clauselist_selectivity(root, clauses, varRelid,
134 : jointype, sjinfo, rel,
135 : &estimatedclauses);
136 :
137 : /*
138 : * This would be the place to apply any other types of extended
139 : * statistics selectivity estimations for remaining clauses.
140 : */
141 : }
142 :
143 : /*
144 : * Apply normal selectivity estimates for remaining clauses. We'll be
145 : * careful to skip any clauses which were already estimated above.
146 : *
147 : * Anything that doesn't look like a potential rangequery clause gets
148 : * multiplied into s1 and forgotten. Anything that does gets inserted into
149 : * an rqlist entry.
150 : */
151 36307 : listidx = -1;
152 57254 : foreach(l, clauses)
153 : {
154 20947 : Node *clause = (Node *) lfirst(l);
155 : RestrictInfo *rinfo;
156 : Selectivity s2;
157 :
158 20947 : listidx++;
159 :
160 : /*
161 : * Skip this clause if it's already been estimated by some other
162 : * statistics above.
163 : */
164 20947 : if (bms_is_member(listidx, estimatedclauses))
165 29 : continue;
166 :
167 : /* Always compute the selectivity using clause_selectivity */
168 20918 : s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
169 :
170 : /*
171 : * Check for being passed a RestrictInfo.
172 : *
173 : * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
174 : * 0.0; just use that rather than looking for range pairs.
175 : */
176 20918 : if (IsA(clause, RestrictInfo))
177 : {
178 20840 : rinfo = (RestrictInfo *) clause;
179 20840 : if (rinfo->pseudoconstant)
180 : {
181 501 : s1 = s1 * s2;
182 501 : continue;
183 : }
184 20339 : clause = (Node *) rinfo->clause;
185 : }
186 : else
187 78 : rinfo = NULL;
188 :
189 : /*
190 : * See if it looks like a restriction clause with a pseudoconstant on
191 : * one side. (Anything more complicated than that might not behave in
192 : * the simple way we are expecting.) Most of the tests here can be
193 : * done more efficiently with rinfo than without.
194 : */
195 20417 : if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
196 : {
197 17037 : OpExpr *expr = (OpExpr *) clause;
198 17037 : bool varonleft = true;
199 : bool ok;
200 :
201 17037 : if (rinfo)
202 : {
203 40757 : ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
204 11913 : (is_pseudo_constant_clause_relids(lsecond(expr->args),
205 877 : rinfo->right_relids) ||
206 877 : (varonleft = false,
207 877 : is_pseudo_constant_clause_relids(linitial(expr->args),
208 : rinfo->left_relids)));
209 : }
210 : else
211 : {
212 234 : ok = (NumRelids(clause) == 1) &&
213 78 : (is_pseudo_constant_clause(lsecond(expr->args)) ||
214 0 : (varonleft = false,
215 0 : is_pseudo_constant_clause(linitial(expr->args))));
216 : }
217 :
218 17037 : if (ok)
219 : {
220 : /*
221 : * If it's not a "<" or ">" operator, just merge the
222 : * selectivity in generically. But if it's the right oprrest,
223 : * add the clause to rqlist for later processing.
224 : */
225 11963 : switch (get_oprrest(expr->opno))
226 : {
227 : case F_SCALARLTSEL:
228 657 : addRangeClause(&rqlist, clause,
229 : varonleft, true, s2);
230 657 : break;
231 : case F_SCALARGTSEL:
232 1924 : addRangeClause(&rqlist, clause,
233 : varonleft, false, s2);
234 1924 : break;
235 : default:
236 : /* Just merge the selectivity in generically */
237 9382 : s1 = s1 * s2;
238 9382 : break;
239 : }
240 11963 : continue; /* drop to loop bottom */
241 : }
242 : }
243 :
244 : /* Not the right form, so treat it generically. */
245 8454 : s1 = s1 * s2;
246 : }
247 :
248 : /*
249 : * Now scan the rangequery pair list.
250 : */
251 74772 : while (rqlist != NULL)
252 : {
253 : RangeQueryClause *rqnext;
254 :
255 2158 : if (rqlist->have_lobound && rqlist->have_hibound)
256 406 : {
257 : /* Successfully matched a pair of range clauses */
258 : Selectivity s2;
259 :
260 : /*
261 : * Exact equality to the default value probably means the
262 : * selectivity function punted. This is not airtight but should
263 : * be good enough.
264 : */
265 812 : if (rqlist->hibound == DEFAULT_INEQ_SEL ||
266 406 : rqlist->lobound == DEFAULT_INEQ_SEL)
267 : {
268 0 : s2 = DEFAULT_RANGE_INEQ_SEL;
269 : }
270 : else
271 : {
272 406 : s2 = rqlist->hibound + rqlist->lobound - 1.0;
273 :
274 : /* Adjust for double-exclusion of NULLs */
275 406 : s2 += nulltestsel(root, IS_NULL, rqlist->var,
276 : varRelid, jointype, sjinfo);
277 :
278 : /*
279 : * A zero or slightly negative s2 should be converted into a
280 : * small positive value; we probably are dealing with a very
281 : * tight range and got a bogus result due to roundoff errors.
282 : * However, if s2 is very negative, then we probably have
283 : * default selectivity estimates on one or both sides of the
284 : * range that we failed to recognize above for some reason.
285 : */
286 406 : if (s2 <= 0.0)
287 : {
288 166 : if (s2 < -0.01)
289 : {
290 : /*
291 : * No data available --- use a default estimate that
292 : * is small, but not real small.
293 : */
294 131 : s2 = DEFAULT_RANGE_INEQ_SEL;
295 : }
296 : else
297 : {
298 : /*
299 : * It's just roundoff error; use a small positive
300 : * value
301 : */
302 35 : s2 = 1.0e-10;
303 : }
304 : }
305 : }
306 : /* Merge in the selectivity of the pair of clauses */
307 406 : s1 *= s2;
308 : }
309 : else
310 : {
311 : /* Only found one of a pair, merge it in generically */
312 1752 : if (rqlist->have_lobound)
313 1515 : s1 *= rqlist->lobound;
314 : else
315 237 : s1 *= rqlist->hibound;
316 : }
317 : /* release storage and advance */
318 2158 : rqnext = rqlist->next;
319 2158 : pfree(rqlist);
320 2158 : rqlist = rqnext;
321 : }
322 :
323 36307 : return s1;
324 : }
325 :
326 : /*
327 : * addRangeClause --- add a new range clause for clauselist_selectivity
328 : *
329 : * Here is where we try to match up pairs of range-query clauses
330 : */
331 : static void
332 2581 : addRangeClause(RangeQueryClause **rqlist, Node *clause,
333 : bool varonleft, bool isLTsel, Selectivity s2)
334 : {
335 : RangeQueryClause *rqelem;
336 : Node *var;
337 : bool is_lobound;
338 :
339 2581 : if (varonleft)
340 : {
341 2578 : var = get_leftop((Expr *) clause);
342 2578 : is_lobound = !isLTsel; /* x < something is high bound */
343 : }
344 : else
345 : {
346 3 : var = get_rightop((Expr *) clause);
347 3 : is_lobound = isLTsel; /* something < x is low bound */
348 : }
349 :
350 5218 : for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
351 : {
352 : /*
353 : * We use full equal() here because the "var" might be a function of
354 : * one or more attributes of the same relation...
355 : */
356 451 : if (!equal(var, rqelem->var))
357 28 : continue;
358 : /* Found the right group to put this clause in */
359 423 : if (is_lobound)
360 : {
361 20 : if (!rqelem->have_lobound)
362 : {
363 14 : rqelem->have_lobound = true;
364 14 : rqelem->lobound = s2;
365 : }
366 : else
367 : {
368 :
369 : /*------
370 : * We have found two similar clauses, such as
371 : * x < y AND x < z.
372 : * Keep only the more restrictive one.
373 : *------
374 : */
375 6 : if (rqelem->lobound > s2)
376 0 : rqelem->lobound = s2;
377 : }
378 : }
379 : else
380 : {
381 403 : if (!rqelem->have_hibound)
382 : {
383 392 : rqelem->have_hibound = true;
384 392 : rqelem->hibound = s2;
385 : }
386 : else
387 : {
388 :
389 : /*------
390 : * We have found two similar clauses, such as
391 : * x > y AND x > z.
392 : * Keep only the more restrictive one.
393 : *------
394 : */
395 11 : if (rqelem->hibound > s2)
396 6 : rqelem->hibound = s2;
397 : }
398 : }
399 3004 : return;
400 : }
401 :
402 : /* No matching var found, so make a new clause-pair data structure */
403 2158 : rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
404 2158 : rqelem->var = var;
405 2158 : if (is_lobound)
406 : {
407 1907 : rqelem->have_lobound = true;
408 1907 : rqelem->have_hibound = false;
409 1907 : rqelem->lobound = s2;
410 : }
411 : else
412 : {
413 251 : rqelem->have_lobound = false;
414 251 : rqelem->have_hibound = true;
415 251 : rqelem->hibound = s2;
416 : }
417 2158 : rqelem->next = *rqlist;
418 2158 : *rqlist = rqelem;
419 : }
420 :
421 : /*
422 : * find_single_rel_for_clauses
423 : * Examine each clause in 'clauses' and determine if all clauses
424 : * reference only a single relation. If so return that relation,
425 : * otherwise return NULL.
426 : */
427 : static RelOptInfo *
428 36307 : find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
429 : {
430 36307 : int lastrelid = 0;
431 : ListCell *l;
432 :
433 49045 : foreach(l, clauses)
434 : {
435 16673 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
436 : int relid;
437 :
438 : /*
439 : * If we have a list of bare clauses rather than RestrictInfos, we
440 : * could pull out their relids the hard way with pull_varnos().
441 : * However, currently the extended-stats machinery won't do anything
442 : * with non-RestrictInfo clauses anyway, so there's no point in
443 : * spending extra cycles; just fail if that's what we have.
444 : */
445 16673 : if (!IsA(rinfo, RestrictInfo))
446 4006 : return NULL;
447 :
448 16602 : if (bms_is_empty(rinfo->clause_relids))
449 508 : continue; /* we can ignore variable-free clauses */
450 16094 : if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
451 3857 : return NULL; /* multiple relations in this clause */
452 12237 : if (lastrelid == 0)
453 5949 : lastrelid = relid; /* first clause referencing a relation */
454 6288 : else if (relid != lastrelid)
455 7 : return NULL; /* relation not same as last one */
456 : }
457 :
458 32372 : if (lastrelid != 0)
459 5376 : return find_base_rel(root, lastrelid);
460 :
461 26996 : return NULL; /* no clauses */
462 : }
463 :
464 : /*
465 : * bms_is_subset_singleton
466 : *
467 : * Same result as bms_is_subset(s, bms_make_singleton(x)),
468 : * but a little faster and doesn't leak memory.
469 : *
470 : * Is this of use anywhere else? If so move to bitmapset.c ...
471 : */
472 : static bool
473 31390 : bms_is_subset_singleton(const Bitmapset *s, int x)
474 : {
475 31390 : switch (bms_membership(s))
476 : {
477 : case BMS_EMPTY_SET:
478 0 : return true;
479 : case BMS_SINGLETON:
480 23018 : return bms_is_member(x, s);
481 : case BMS_MULTIPLE:
482 8372 : return false;
483 : }
484 : /* can't get here... */
485 0 : return false;
486 : }
487 :
488 : /*
489 : * treat_as_join_clause -
490 : * Decide whether an operator clause is to be handled by the
491 : * restriction or join estimator. Subroutine for clause_selectivity().
492 : */
493 : static inline bool
494 27457 : treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
495 : int varRelid, SpecialJoinInfo *sjinfo)
496 : {
497 27457 : if (varRelid != 0)
498 : {
499 : /*
500 : * Caller is forcing restriction mode (eg, because we are examining an
501 : * inner indexscan qual).
502 : */
503 9228 : return false;
504 : }
505 18229 : else if (sjinfo == NULL)
506 : {
507 : /*
508 : * It must be a restriction clause, since it's being evaluated at a
509 : * scan node.
510 : */
511 11901 : return false;
512 : }
513 : else
514 : {
515 : /*
516 : * Otherwise, it's a join if there's more than one relation used. We
517 : * can optimize this calculation if an rinfo was passed.
518 : *
519 : * XXX Since we know the clause is being evaluated at a join, the
520 : * only way it could be single-relation is if it was delayed by outer
521 : * joins. Although we can make use of the restriction qual estimators
522 : * anyway, it seems likely that we ought to account for the
523 : * probability of injected nulls somehow.
524 : */
525 6328 : if (rinfo)
526 6325 : return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
527 : else
528 3 : return (NumRelids(clause) > 1);
529 : }
530 : }
531 :
532 :
533 : /*
534 : * clause_selectivity -
535 : * Compute the selectivity of a general boolean expression clause.
536 : *
537 : * The clause can be either a RestrictInfo or a plain expression. If it's
538 : * a RestrictInfo, we try to cache the selectivity for possible re-use,
539 : * so passing RestrictInfos is preferred.
540 : *
541 : * varRelid is either 0 or a rangetable index.
542 : *
543 : * When varRelid is not 0, only variables belonging to that relation are
544 : * considered in computing selectivity; other vars are treated as constants
545 : * of unknown values. This is appropriate for estimating the selectivity of
546 : * a join clause that is being used as a restriction clause in a scan of a
547 : * nestloop join's inner relation --- varRelid should then be the ID of the
548 : * inner relation.
549 : *
550 : * When varRelid is 0, all variables are treated as variables. This
551 : * is appropriate for ordinary join clauses and restriction clauses.
552 : *
553 : * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
554 : * if the clause isn't a join clause.
555 : *
556 : * sjinfo is NULL for a non-join clause, otherwise it provides additional
557 : * context information about the join being performed. There are some
558 : * special cases:
559 : * 1. For a special (not INNER) join, sjinfo is always a member of
560 : * root->join_info_list.
561 : * 2. For an INNER join, sjinfo is just a transient struct, and only the
562 : * relids and jointype fields in it can be trusted.
563 : * It is possible for jointype to be different from sjinfo->jointype.
564 : * This indicates we are considering a variant join: either with
565 : * the LHS and RHS switched, or with one input unique-ified.
566 : *
567 : * Note: when passing nonzero varRelid, it's normally appropriate to set
568 : * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
569 : * join clause; because we aren't treating it as a join clause.
570 : */
571 : Selectivity
572 73531 : clause_selectivity(PlannerInfo *root,
573 : Node *clause,
574 : int varRelid,
575 : JoinType jointype,
576 : SpecialJoinInfo *sjinfo)
577 : {
578 73531 : Selectivity s1 = 0.5; /* default for any unhandled clause type */
579 73531 : RestrictInfo *rinfo = NULL;
580 73531 : bool cacheable = false;
581 :
582 73531 : if (clause == NULL) /* can this still happen? */
583 0 : return s1;
584 :
585 73531 : if (IsA(clause, RestrictInfo))
586 : {
587 72597 : rinfo = (RestrictInfo *) clause;
588 :
589 : /*
590 : * If the clause is marked pseudoconstant, then it will be used as a
591 : * gating qual and should not affect selectivity estimates; hence
592 : * return 1.0. The only exception is that a constant FALSE may be
593 : * taken as having selectivity 0.0, since it will surely mean no rows
594 : * out of the plan. This case is simple enough that we need not
595 : * bother caching the result.
596 : */
597 72597 : if (rinfo->pseudoconstant)
598 : {
599 514 : if (!IsA(rinfo->clause, Const))
600 508 : return (Selectivity) 1.0;
601 : }
602 :
603 : /*
604 : * If the clause is marked redundant, always return 1.0.
605 : */
606 72089 : if (rinfo->norm_selec > 1)
607 563 : return (Selectivity) 1.0;
608 :
609 : /*
610 : * If possible, cache the result of the selectivity calculation for
611 : * the clause. We can cache if varRelid is zero or the clause
612 : * contains only vars of that relid --- otherwise varRelid will affect
613 : * the result, so mustn't cache. Outer join quals might be examined
614 : * with either their join's actual jointype or JOIN_INNER, so we need
615 : * two cache variables to remember both cases. Note: we assume the
616 : * result won't change if we are switching the input relations or
617 : * considering a unique-ified case, so we only need one cache variable
618 : * for all non-JOIN_INNER cases.
619 : */
620 102916 : if (varRelid == 0 ||
621 31390 : bms_is_subset_singleton(rinfo->clause_relids, varRelid))
622 : {
623 : /* Cacheable --- do we already have the result? */
624 63082 : if (jointype == JOIN_INNER)
625 : {
626 56879 : if (rinfo->norm_selec >= 0)
627 37896 : return rinfo->norm_selec;
628 : }
629 : else
630 : {
631 6203 : if (rinfo->outer_selec >= 0)
632 2980 : return rinfo->outer_selec;
633 : }
634 22206 : cacheable = true;
635 : }
636 :
637 : /*
638 : * Proceed with examination of contained clause. If the clause is an
639 : * OR-clause, we want to look at the variant with sub-RestrictInfos,
640 : * so that per-subclause selectivities can be cached.
641 : */
642 30650 : if (rinfo->orclause)
643 523 : clause = (Node *) rinfo->orclause;
644 : else
645 30127 : clause = (Node *) rinfo->clause;
646 : }
647 :
648 31584 : if (IsA(clause, Var))
649 : {
650 1053 : Var *var = (Var *) clause;
651 :
652 : /*
653 : * We probably shouldn't ever see an uplevel Var here, but if we do,
654 : * return the default selectivity...
655 : */
656 1053 : if (var->varlevelsup == 0 &&
657 12 : (varRelid == 0 || varRelid == (int) var->varno))
658 : {
659 : /* Use the restriction selectivity function for a bool Var */
660 1049 : s1 = boolvarsel(root, (Node *) var, varRelid);
661 : }
662 : }
663 30531 : else if (IsA(clause, Const))
664 : {
665 : /* bool constant is pretty easy... */
666 8 : Const *con = (Const *) clause;
667 :
668 16 : s1 = con->constisnull ? 0.0 :
669 8 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
670 : }
671 30523 : else if (IsA(clause, Param))
672 : {
673 : /* see if we can replace the Param */
674 0 : Node *subst = estimate_expression_value(root, clause);
675 :
676 0 : if (IsA(subst, Const))
677 : {
678 : /* bool constant is pretty easy... */
679 0 : Const *con = (Const *) subst;
680 :
681 0 : s1 = con->constisnull ? 0.0 :
682 0 : DatumGetBool(con->constvalue) ? 1.0 : 0.0;
683 : }
684 : else
685 : {
686 : /* XXX any way to do better than default? */
687 : }
688 : }
689 30523 : else if (not_clause(clause))
690 : {
691 : /* inverse of the selectivity of the underlying clause */
692 671 : s1 = 1.0 - clause_selectivity(root,
693 671 : (Node *) get_notclausearg((Expr *) clause),
694 : varRelid,
695 : jointype,
696 : sjinfo);
697 : }
698 29852 : else if (and_clause(clause))
699 : {
700 : /* share code with clauselist_selectivity() */
701 98 : s1 = clauselist_selectivity(root,
702 : ((BoolExpr *) clause)->args,
703 : varRelid,
704 : jointype,
705 : sjinfo);
706 : }
707 29754 : else if (or_clause(clause))
708 : {
709 : /*
710 : * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
711 : * account for the probable overlap of selected tuple sets.
712 : *
713 : * XXX is this too conservative?
714 : */
715 : ListCell *arg;
716 :
717 523 : s1 = 0.0;
718 1949 : foreach(arg, ((BoolExpr *) clause)->args)
719 : {
720 1426 : Selectivity s2 = clause_selectivity(root,
721 1426 : (Node *) lfirst(arg),
722 : varRelid,
723 : jointype,
724 : sjinfo);
725 :
726 1426 : s1 = s1 + s2 - s1 * s2;
727 : }
728 : }
729 29231 : else if (is_opclause(clause) || IsA(clause, DistinctExpr))
730 26959 : {
731 26959 : OpExpr *opclause = (OpExpr *) clause;
732 26959 : Oid opno = opclause->opno;
733 :
734 26959 : if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
735 : {
736 : /* Estimate selectivity for a join clause. */
737 6118 : s1 = join_selectivity(root, opno,
738 : opclause->args,
739 : opclause->inputcollid,
740 : jointype,
741 : sjinfo);
742 : }
743 : else
744 : {
745 : /* Estimate selectivity for a restriction clause. */
746 20841 : s1 = restriction_selectivity(root, opno,
747 : opclause->args,
748 : opclause->inputcollid,
749 : varRelid);
750 : }
751 :
752 : /*
753 : * DistinctExpr has the same representation as OpExpr, but the
754 : * contained operator is "=" not "<>", so we must negate the result.
755 : * This estimation method doesn't give the right behavior for nulls,
756 : * but it's better than doing nothing.
757 : */
758 26959 : if (IsA(clause, DistinctExpr))
759 8 : s1 = 1.0 - s1;
760 : }
761 2272 : else if (IsA(clause, ScalarArrayOpExpr))
762 : {
763 : /* Use node specific selectivity calculation function */
764 498 : s1 = scalararraysel(root,
765 : (ScalarArrayOpExpr *) clause,
766 498 : treat_as_join_clause(clause, rinfo,
767 : varRelid, sjinfo),
768 : varRelid,
769 : jointype,
770 : sjinfo);
771 : }
772 1774 : else if (IsA(clause, RowCompareExpr))
773 : {
774 : /* Use node specific selectivity calculation function */
775 9 : s1 = rowcomparesel(root,
776 : (RowCompareExpr *) clause,
777 : varRelid,
778 : jointype,
779 : sjinfo);
780 : }
781 1765 : else if (IsA(clause, NullTest))
782 : {
783 : /* Use node specific selectivity calculation function */
784 482 : s1 = nulltestsel(root,
785 : ((NullTest *) clause)->nulltesttype,
786 482 : (Node *) ((NullTest *) clause)->arg,
787 : varRelid,
788 : jointype,
789 : sjinfo);
790 : }
791 1283 : else if (IsA(clause, BooleanTest))
792 : {
793 : /* Use node specific selectivity calculation function */
794 20 : s1 = booltestsel(root,
795 : ((BooleanTest *) clause)->booltesttype,
796 20 : (Node *) ((BooleanTest *) clause)->arg,
797 : varRelid,
798 : jointype,
799 : sjinfo);
800 : }
801 1263 : else if (IsA(clause, CurrentOfExpr))
802 : {
803 : /* CURRENT OF selects at most one row of its table */
804 57 : CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
805 57 : RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
806 :
807 57 : if (crel->tuples > 0)
808 57 : s1 = 1.0 / crel->tuples;
809 : }
810 1206 : else if (IsA(clause, RelabelType))
811 : {
812 : /* Not sure this case is needed, but it can't hurt */
813 0 : s1 = clause_selectivity(root,
814 0 : (Node *) ((RelabelType *) clause)->arg,
815 : varRelid,
816 : jointype,
817 : sjinfo);
818 : }
819 1206 : else if (IsA(clause, CoerceToDomain))
820 : {
821 : /* Not sure this case is needed, but it can't hurt */
822 0 : s1 = clause_selectivity(root,
823 0 : (Node *) ((CoerceToDomain *) clause)->arg,
824 : varRelid,
825 : jointype,
826 : sjinfo);
827 : }
828 : else
829 : {
830 : /*
831 : * For anything else, see if we can consider it as a boolean variable.
832 : * This only works if it's an immutable expression in Vars of a single
833 : * relation; but there's no point in us checking that here because
834 : * boolvarsel() will do it internally, and return a suitable default
835 : * selectivity if not.
836 : */
837 1206 : s1 = boolvarsel(root, clause, varRelid);
838 : }
839 :
840 : /* Cache the result if possible */
841 31584 : if (cacheable)
842 : {
843 22206 : if (jointype == JOIN_INNER)
844 18983 : rinfo->norm_selec = s1;
845 : else
846 3223 : rinfo->outer_selec = s1;
847 : }
848 :
849 : #ifdef SELECTIVITY_DEBUG
850 : elog(DEBUG4, "clause_selectivity: s1 %f", s1);
851 : #endif /* SELECTIVITY_DEBUG */
852 :
853 31584 : return s1;
854 : }
|