Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * orclauses.c
4 : * Routines to extract restriction OR clauses from join OR clauses
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/optimizer/util/orclauses.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "optimizer/clauses.h"
19 : #include "optimizer/cost.h"
20 : #include "optimizer/orclauses.h"
21 : #include "optimizer/restrictinfo.h"
22 :
23 :
24 : static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
25 : static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
26 : static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
27 : Expr *orclause, RestrictInfo *join_or_rinfo);
28 :
29 :
30 : /*
31 : * extract_restriction_or_clauses
32 : * Examine join OR-of-AND clauses to see if any useful restriction OR
33 : * clauses can be extracted. If so, add them to the query.
34 : *
35 : * Although a join clause must reference multiple relations overall,
36 : * an OR of ANDs clause might contain sub-clauses that reference just one
37 : * relation and can be used to build a restriction clause for that rel.
38 : * For example consider
39 : * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
40 : * We can transform this into
41 : * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
42 : * AND (a.x = 42 OR a.x = 44)
43 : * AND (b.y = 43 OR b.z = 45);
44 : * which allows the latter clauses to be applied during the scans of a and b,
45 : * perhaps as index qualifications, and in any case reducing the number of
46 : * rows arriving at the join. In essence this is a partial transformation to
47 : * CNF (AND of ORs format). It is not complete, however, because we do not
48 : * unravel the original OR --- doing so would usually bloat the qualification
49 : * expression to little gain.
50 : *
51 : * The added quals are partially redundant with the original OR, and therefore
52 : * would cause the size of the joinrel to be underestimated when it is finally
53 : * formed. (This would be true of a full transformation to CNF as well; the
54 : * fault is not really in the transformation, but in clauselist_selectivity's
55 : * inability to recognize redundant conditions.) We can compensate for this
56 : * redundancy by changing the cached selectivity of the original OR clause,
57 : * canceling out the (valid) reduction in the estimated sizes of the base
58 : * relations so that the estimated joinrel size remains the same. This is
59 : * a MAJOR HACK: it depends on the fact that clause selectivities are cached
60 : * and on the fact that the same RestrictInfo node will appear in every
61 : * joininfo list that might be used when the joinrel is formed.
62 : * And it doesn't work in cases where the size estimation is nonlinear
63 : * (i.e., outer and IN joins). But it beats not doing anything.
64 : *
65 : * We examine each base relation to see if join clauses associated with it
66 : * contain extractable restriction conditions. If so, add those conditions
67 : * to the rel's baserestrictinfo and update the cached selectivities of the
68 : * join clauses. Note that the same join clause will be examined afresh
69 : * from the point of view of each baserel that participates in it, so its
70 : * cached selectivity may get updated multiple times.
71 : */
72 : void
73 13818 : extract_restriction_or_clauses(PlannerInfo *root)
74 : {
75 : Index rti;
76 :
77 : /* Examine each baserel for potential join OR clauses */
78 40855 : for (rti = 1; rti < root->simple_rel_array_size; rti++)
79 : {
80 27037 : RelOptInfo *rel = root->simple_rel_array[rti];
81 : ListCell *lc;
82 :
83 : /* there may be empty slots corresponding to non-baserel RTEs */
84 27037 : if (rel == NULL)
85 7514 : continue;
86 :
87 19523 : Assert(rel->relid == rti); /* sanity check on array */
88 :
89 : /* ignore RTEs that are "other rels" */
90 19523 : if (rel->reloptkind != RELOPT_BASEREL)
91 1983 : continue;
92 :
93 : /*
94 : * Find potentially interesting OR joinclauses. We can use any
95 : * joinclause that is considered safe to move to this rel by the
96 : * parameterized-path machinery, even though what we are going to do
97 : * with it is not exactly a parameterized path.
98 : *
99 : * However, it seems best to ignore clauses that have been marked
100 : * redundant (by setting norm_selec > 1). That likely can't happen
101 : * for OR clauses, but let's be safe.
102 : */
103 20641 : foreach(lc, rel->joininfo)
104 : {
105 3101 : RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
106 :
107 3310 : if (restriction_is_or_clause(rinfo) &&
108 397 : join_clause_is_movable_to(rinfo, rel) &&
109 188 : rinfo->norm_selec <= 1)
110 : {
111 : /* Try to extract a qual for this rel only */
112 188 : Expr *orclause = extract_or_clause(rinfo, rel);
113 :
114 : /*
115 : * If successful, decide whether we want to use the clause,
116 : * and insert it into the rel's restrictinfo list if so.
117 : */
118 188 : if (orclause)
119 6 : consider_new_or_clause(root, rel, orclause, rinfo);
120 : }
121 : }
122 : }
123 13818 : }
124 :
125 : /*
126 : * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
127 : */
128 : static bool
129 295 : is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
130 : {
131 : /*
132 : * We want clauses that mention the rel, and only the rel. So in
133 : * particular pseudoconstant clauses can be rejected quickly. Then check
134 : * the clause's Var membership.
135 : */
136 295 : if (rinfo->pseudoconstant)
137 0 : return false;
138 295 : if (!bms_equal(rinfo->clause_relids, rel->relids))
139 199 : return false;
140 :
141 : /* We don't want extra evaluations of any volatile functions */
142 96 : if (contain_volatile_functions((Node *) rinfo->clause))
143 0 : return false;
144 :
145 96 : return true;
146 : }
147 :
148 : /*
149 : * Try to extract a restriction clause mentioning only "rel" from the given
150 : * join OR-clause.
151 : *
152 : * We must be able to extract at least one qual for this rel from each of
153 : * the arms of the OR, else we can't use it.
154 : *
155 : * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
156 : * if no OR clause could be extracted.
157 : */
158 : static Expr *
159 194 : extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
160 : {
161 194 : List *clauselist = NIL;
162 : ListCell *lc;
163 :
164 : /*
165 : * Scan each arm of the input OR clause. Notice we descend into
166 : * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
167 : * toplevel OR/AND structure. This is useful because we can use the info
168 : * in those nodes to make is_safe_restriction_clause_for()'s checks
169 : * cheaper. We'll strip those nodes from the returned tree, though,
170 : * meaning that fresh ones will be built if the clause is accepted as a
171 : * restriction clause. This might seem wasteful --- couldn't we re-use
172 : * the existing RestrictInfos? But that'd require assuming that
173 : * selectivity and other cached data is computed exactly the same way for
174 : * a restriction clause as for a join clause, which seems undesirable.
175 : */
176 194 : Assert(or_clause((Node *) or_rinfo->orclause));
177 291 : foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
178 : {
179 284 : Node *orarg = (Node *) lfirst(lc);
180 284 : List *subclauses = NIL;
181 : Node *subclause;
182 :
183 : /* OR arguments should be ANDs or sub-RestrictInfos */
184 284 : if (and_clause(orarg))
185 : {
186 17 : List *andargs = ((BoolExpr *) orarg)->args;
187 : ListCell *lc2;
188 :
189 51 : foreach(lc2, andargs)
190 : {
191 34 : RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
192 :
193 34 : if (restriction_is_or_clause(rinfo))
194 : {
195 : /*
196 : * Recurse to deal with nested OR. Note we *must* recurse
197 : * here, this isn't just overly-tense optimization: we
198 : * have to descend far enough to find and strip all
199 : * RestrictInfos in the expression.
200 : */
201 : Expr *suborclause;
202 :
203 6 : suborclause = extract_or_clause(rinfo, rel);
204 6 : if (suborclause)
205 1 : subclauses = lappend(subclauses, suborclause);
206 : }
207 28 : else if (is_safe_restriction_clause_for(rinfo, rel))
208 12 : subclauses = lappend(subclauses, rinfo->clause);
209 : }
210 : }
211 : else
212 : {
213 267 : RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
214 :
215 267 : Assert(!restriction_is_or_clause(rinfo));
216 267 : if (is_safe_restriction_clause_for(rinfo, rel))
217 84 : subclauses = lappend(subclauses, rinfo->clause);
218 : }
219 :
220 : /*
221 : * If nothing could be extracted from this arm, we can't do anything
222 : * with this OR clause.
223 : */
224 284 : if (subclauses == NIL)
225 187 : return NULL;
226 :
227 : /*
228 : * OK, add subclause(s) to the result OR. If we found more than one,
229 : * we need an AND node. But if we found only one, and it is itself an
230 : * OR node, add its subclauses to the result instead; this is needed
231 : * to preserve AND/OR flatness (ie, no OR directly underneath OR).
232 : */
233 97 : subclause = (Node *) make_ands_explicit(subclauses);
234 97 : if (or_clause(subclause))
235 1 : clauselist = list_concat(clauselist,
236 1 : list_copy(((BoolExpr *) subclause)->args));
237 : else
238 96 : clauselist = lappend(clauselist, subclause);
239 : }
240 :
241 : /*
242 : * If we got a restriction clause from every arm, wrap them up in an OR
243 : * node. (In theory the OR node might be unnecessary, if there was only
244 : * one arm --- but then the input OR node was also redundant.)
245 : */
246 7 : if (clauselist != NIL)
247 7 : return make_orclause(clauselist);
248 0 : return NULL;
249 : }
250 :
251 : /*
252 : * Consider whether a successfully-extracted restriction OR clause is
253 : * actually worth using. If so, add it to the planner's data structures,
254 : * and adjust the original join clause (join_or_rinfo) to compensate.
255 : */
256 : static void
257 6 : consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
258 : Expr *orclause, RestrictInfo *join_or_rinfo)
259 : {
260 : RestrictInfo *or_rinfo;
261 : Selectivity or_selec,
262 : orig_selec;
263 :
264 : /*
265 : * Build a RestrictInfo from the new OR clause. We can assume it's valid
266 : * as a base restriction clause.
267 : */
268 6 : or_rinfo = make_restrictinfo(orclause,
269 : true,
270 : false,
271 : false,
272 : join_or_rinfo->security_level,
273 : NULL,
274 : NULL,
275 : NULL);
276 :
277 : /*
278 : * Estimate its selectivity. (We could have done this earlier, but doing
279 : * it on the RestrictInfo representation allows the result to get cached,
280 : * saving work later.)
281 : */
282 6 : or_selec = clause_selectivity(root, (Node *) or_rinfo,
283 : 0, JOIN_INNER, NULL);
284 :
285 : /*
286 : * The clause is only worth adding to the query if it rejects a useful
287 : * fraction of the base relation's rows; otherwise, it's just going to
288 : * cause duplicate computation (since we will still have to check the
289 : * original OR clause when the join is formed). Somewhat arbitrarily, we
290 : * set the selectivity threshold at 0.9.
291 : */
292 6 : if (or_selec > 0.9)
293 6 : return; /* forget it */
294 :
295 : /*
296 : * OK, add it to the rel's restriction-clause list.
297 : */
298 6 : rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
299 6 : rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
300 : or_rinfo->security_level);
301 :
302 : /*
303 : * Adjust the original join OR clause's cached selectivity to compensate
304 : * for the selectivity of the added (but redundant) lower-level qual. This
305 : * should result in the join rel getting approximately the same rows
306 : * estimate as it would have gotten without all these shenanigans.
307 : *
308 : * XXX major hack alert: this depends on the assumption that the
309 : * selectivity will stay cached.
310 : *
311 : * XXX another major hack: we adjust only norm_selec, the cached
312 : * selectivity for JOIN_INNER semantics, even though the join clause
313 : * might've been an outer-join clause. This is partly because we can't
314 : * easily identify the relevant SpecialJoinInfo here, and partly because
315 : * the linearity assumption we're making would fail anyway. (If it is an
316 : * outer-join clause, "rel" must be on the nullable side, else we'd not
317 : * have gotten here. So the computation of the join size is going to be
318 : * quite nonlinear with respect to the size of "rel", so it's not clear
319 : * how we ought to adjust outer_selec even if we could compute its
320 : * original value correctly.)
321 : */
322 6 : if (or_selec > 0)
323 : {
324 : SpecialJoinInfo sjinfo;
325 :
326 : /*
327 : * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare
328 : * approx_tuple_count() in costsize.c.)
329 : */
330 6 : sjinfo.type = T_SpecialJoinInfo;
331 6 : sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
332 6 : rel->relids);
333 6 : sjinfo.min_righthand = rel->relids;
334 6 : sjinfo.syn_lefthand = sjinfo.min_lefthand;
335 6 : sjinfo.syn_righthand = sjinfo.min_righthand;
336 6 : sjinfo.jointype = JOIN_INNER;
337 : /* we don't bother trying to make the remaining fields valid */
338 6 : sjinfo.lhs_strict = false;
339 6 : sjinfo.delay_upper_joins = false;
340 6 : sjinfo.semi_can_btree = false;
341 6 : sjinfo.semi_can_hash = false;
342 6 : sjinfo.semi_operators = NIL;
343 6 : sjinfo.semi_rhs_exprs = NIL;
344 :
345 : /* Compute inner-join size */
346 6 : orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
347 : 0, JOIN_INNER, &sjinfo);
348 :
349 : /* And hack cached selectivity so join size remains the same */
350 6 : join_or_rinfo->norm_selec = orig_selec / or_selec;
351 : /* ensure result stays in sane range, in particular not "redundant" */
352 6 : if (join_or_rinfo->norm_selec > 1)
353 0 : join_or_rinfo->norm_selec = 1;
354 : /* as explained above, we don't touch outer_selec */
355 : }
356 : }
|