Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * prepqual.c
4 : * Routines for preprocessing qualification expressions
5 : *
6 : *
7 : * While the parser will produce flattened (N-argument) AND/OR trees from
8 : * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9 : * directly underneath another AND, or OR underneath OR, if the input was
10 : * oddly parenthesized. Also, rule expansion and subquery flattening could
11 : * produce such parsetrees. The planner wants to flatten all such cases
12 : * to ensure consistent optimization behavior.
13 : *
14 : * Formerly, this module was responsible for doing the initial flattening,
15 : * but now we leave it to eval_const_expressions to do that since it has to
16 : * make a complete pass over the expression tree anyway. Instead, we just
17 : * have to ensure that our manipulations preserve AND/OR flatness.
18 : * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19 : * tree after local transformations that might introduce nested AND/ORs.
20 : *
21 : *
22 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
23 : * Portions Copyright (c) 1994, Regents of the University of California
24 : *
25 : *
26 : * IDENTIFICATION
27 : * src/backend/optimizer/prep/prepqual.c
28 : *
29 : *-------------------------------------------------------------------------
30 : */
31 :
32 : #include "postgres.h"
33 :
34 : #include "nodes/makefuncs.h"
35 : #include "optimizer/clauses.h"
36 : #include "optimizer/prep.h"
37 : #include "utils/lsyscache.h"
38 :
39 :
40 : static List *pull_ands(List *andlist);
41 : static List *pull_ors(List *orlist);
42 : static Expr *find_duplicate_ors(Expr *qual);
43 : static Expr *process_duplicate_ors(List *orlist);
44 :
45 :
46 : /*
47 : * negate_clause
48 : * Negate a Boolean expression.
49 : *
50 : * Input is a clause to be negated (e.g., the argument of a NOT clause).
51 : * Returns a new clause equivalent to the negation of the given clause.
52 : *
53 : * Although this can be invoked on its own, it's mainly intended as a helper
54 : * for eval_const_expressions(), and that context drives several design
55 : * decisions. In particular, if the input is already AND/OR flat, we must
56 : * preserve that property. We also don't bother to recurse in situations
57 : * where we can assume that lower-level executions of eval_const_expressions
58 : * would already have simplified sub-clauses of the input.
59 : *
60 : * The difference between this and a simple make_notclause() is that this
61 : * tries to get rid of the NOT node by logical simplification. It's clearly
62 : * always a win if the NOT node can be eliminated altogether. However, our
63 : * use of DeMorgan's laws could result in having more NOT nodes rather than
64 : * fewer. We do that unconditionally anyway, because in WHERE clauses it's
65 : * important to expose as much top-level AND/OR structure as possible.
66 : * Also, eliminating an intermediate NOT may allow us to flatten two levels
67 : * of AND or OR together that we couldn't have otherwise. Finally, one of
68 : * the motivations for doing this is to ensure that logically equivalent
69 : * expressions will be seen as physically equal(), so we should always apply
70 : * the same transformations.
71 : */
72 : Node *
73 957 : negate_clause(Node *node)
74 : {
75 957 : if (node == NULL) /* should not happen */
76 0 : elog(ERROR, "can't negate an empty subexpression");
77 957 : switch (nodeTag(node))
78 : {
79 : case T_Const:
80 : {
81 8 : Const *c = (Const *) node;
82 :
83 : /* NOT NULL is still NULL */
84 8 : if (c->constisnull)
85 0 : return makeBoolConst(false, true);
86 : /* otherwise pretty easy */
87 8 : return makeBoolConst(!DatumGetBool(c->constvalue), false);
88 : }
89 : break;
90 : case T_OpExpr:
91 : {
92 : /*
93 : * Negate operator if possible: (NOT (< A B)) => (>= A B)
94 : */
95 130 : OpExpr *opexpr = (OpExpr *) node;
96 130 : Oid negator = get_negator(opexpr->opno);
97 :
98 130 : if (negator)
99 : {
100 88 : OpExpr *newopexpr = makeNode(OpExpr);
101 :
102 88 : newopexpr->opno = negator;
103 88 : newopexpr->opfuncid = InvalidOid;
104 88 : newopexpr->opresulttype = opexpr->opresulttype;
105 88 : newopexpr->opretset = opexpr->opretset;
106 88 : newopexpr->opcollid = opexpr->opcollid;
107 88 : newopexpr->inputcollid = opexpr->inputcollid;
108 88 : newopexpr->args = opexpr->args;
109 88 : newopexpr->location = opexpr->location;
110 88 : return (Node *) newopexpr;
111 : }
112 : }
113 42 : break;
114 : case T_ScalarArrayOpExpr:
115 : {
116 : /*
117 : * Negate a ScalarArrayOpExpr if its operator has a negator;
118 : * for example x = ANY (list) becomes x <> ALL (list)
119 : */
120 6 : ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
121 6 : Oid negator = get_negator(saopexpr->opno);
122 :
123 6 : if (negator)
124 : {
125 6 : ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
126 :
127 6 : newopexpr->opno = negator;
128 6 : newopexpr->opfuncid = InvalidOid;
129 6 : newopexpr->useOr = !saopexpr->useOr;
130 6 : newopexpr->inputcollid = saopexpr->inputcollid;
131 6 : newopexpr->args = saopexpr->args;
132 6 : newopexpr->location = saopexpr->location;
133 6 : return (Node *) newopexpr;
134 : }
135 : }
136 0 : break;
137 : case T_BoolExpr:
138 : {
139 60 : BoolExpr *expr = (BoolExpr *) node;
140 :
141 60 : switch (expr->boolop)
142 : {
143 : /*--------------------
144 : * Apply DeMorgan's Laws:
145 : * (NOT (AND A B)) => (OR (NOT A) (NOT B))
146 : * (NOT (OR A B)) => (AND (NOT A) (NOT B))
147 : * i.e., swap AND for OR and negate each subclause.
148 : *
149 : * If the input is already AND/OR flat and has no NOT
150 : * directly above AND or OR, this transformation preserves
151 : * those properties. For example, if no direct child of
152 : * the given AND clause is an AND or a NOT-above-OR, then
153 : * the recursive calls of negate_clause() can't return any
154 : * OR clauses. So we needn't call pull_ors() before
155 : * building a new OR clause. Similarly for the OR case.
156 : *--------------------
157 : */
158 : case AND_EXPR:
159 : {
160 46 : List *nargs = NIL;
161 : ListCell *lc;
162 :
163 172 : foreach(lc, expr->args)
164 : {
165 126 : nargs = lappend(nargs,
166 126 : negate_clause(lfirst(lc)));
167 : }
168 46 : return (Node *) make_orclause(nargs);
169 : }
170 : break;
171 : case OR_EXPR:
172 : {
173 7 : List *nargs = NIL;
174 : ListCell *lc;
175 :
176 26 : foreach(lc, expr->args)
177 : {
178 19 : nargs = lappend(nargs,
179 19 : negate_clause(lfirst(lc)));
180 : }
181 7 : return (Node *) make_andclause(nargs);
182 : }
183 : break;
184 : case NOT_EXPR:
185 :
186 : /*
187 : * NOT underneath NOT: they cancel. We assume the
188 : * input is already simplified, so no need to recurse.
189 : */
190 7 : return (Node *) linitial(expr->args);
191 : default:
192 0 : elog(ERROR, "unrecognized boolop: %d",
193 : (int) expr->boolop);
194 : break;
195 : }
196 : }
197 : break;
198 : case T_NullTest:
199 : {
200 0 : NullTest *expr = (NullTest *) node;
201 :
202 : /*
203 : * In the rowtype case, the two flavors of NullTest are *not*
204 : * logical inverses, so we can't simplify. But it does work
205 : * for scalar datatypes.
206 : */
207 0 : if (!expr->argisrow)
208 : {
209 0 : NullTest *newexpr = makeNode(NullTest);
210 :
211 0 : newexpr->arg = expr->arg;
212 0 : newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
213 0 : IS_NOT_NULL : IS_NULL);
214 0 : newexpr->argisrow = expr->argisrow;
215 0 : newexpr->location = expr->location;
216 0 : return (Node *) newexpr;
217 : }
218 : }
219 0 : break;
220 : case T_BooleanTest:
221 : {
222 0 : BooleanTest *expr = (BooleanTest *) node;
223 0 : BooleanTest *newexpr = makeNode(BooleanTest);
224 :
225 0 : newexpr->arg = expr->arg;
226 0 : switch (expr->booltesttype)
227 : {
228 : case IS_TRUE:
229 0 : newexpr->booltesttype = IS_NOT_TRUE;
230 0 : break;
231 : case IS_NOT_TRUE:
232 0 : newexpr->booltesttype = IS_TRUE;
233 0 : break;
234 : case IS_FALSE:
235 0 : newexpr->booltesttype = IS_NOT_FALSE;
236 0 : break;
237 : case IS_NOT_FALSE:
238 0 : newexpr->booltesttype = IS_FALSE;
239 0 : break;
240 : case IS_UNKNOWN:
241 0 : newexpr->booltesttype = IS_NOT_UNKNOWN;
242 0 : break;
243 : case IS_NOT_UNKNOWN:
244 0 : newexpr->booltesttype = IS_UNKNOWN;
245 0 : break;
246 : default:
247 0 : elog(ERROR, "unrecognized booltesttype: %d",
248 : (int) expr->booltesttype);
249 : break;
250 : }
251 0 : newexpr->location = expr->location;
252 0 : return (Node *) newexpr;
253 : }
254 : break;
255 : default:
256 : /* else fall through */
257 753 : break;
258 : }
259 :
260 : /*
261 : * Otherwise we don't know how to simplify this, so just tack on an
262 : * explicit NOT node.
263 : */
264 795 : return (Node *) make_notclause((Expr *) node);
265 : }
266 :
267 :
268 : /*
269 : * canonicalize_qual
270 : * Convert a qualification expression to the most useful form.
271 : *
272 : * The name of this routine is a holdover from a time when it would try to
273 : * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
274 : * Eventually, we recognized that that had more theoretical purity than
275 : * actual usefulness, and so now the transformation doesn't involve any
276 : * notion of reaching a canonical form.
277 : *
278 : * NOTE: we assume the input has already been through eval_const_expressions
279 : * and therefore possesses AND/OR flatness. Formerly this function included
280 : * its own flattening logic, but that requires a useless extra pass over the
281 : * tree.
282 : *
283 : * Returns the modified qualification.
284 : */
285 : Expr *
286 12485 : canonicalize_qual(Expr *qual)
287 : {
288 : Expr *newqual;
289 :
290 : /* Quick exit for empty qual */
291 12485 : if (qual == NULL)
292 0 : return NULL;
293 :
294 : /*
295 : * Pull up redundant subclauses in OR-of-AND trees. We do this only
296 : * within the top-level AND/OR structure; there's no point in looking
297 : * deeper. Also remove any NULL constants in the top-level structure.
298 : */
299 12485 : newqual = find_duplicate_ors(qual);
300 :
301 12485 : return newqual;
302 : }
303 :
304 :
305 : /*
306 : * pull_ands
307 : * Recursively flatten nested AND clauses into a single and-clause list.
308 : *
309 : * Input is the arglist of an AND clause.
310 : * Returns the rebuilt arglist (note original list structure is not touched).
311 : */
312 : static List *
313 3977 : pull_ands(List *andlist)
314 : {
315 3977 : List *out_list = NIL;
316 : ListCell *arg;
317 :
318 14125 : foreach(arg, andlist)
319 : {
320 10148 : Node *subexpr = (Node *) lfirst(arg);
321 :
322 : /*
323 : * Note: we can destructively concat the subexpression's arglist
324 : * because we know the recursive invocation of pull_ands will have
325 : * built a new arglist not shared with any other expr. Otherwise we'd
326 : * need a list_copy here.
327 : */
328 10148 : if (and_clause(subexpr))
329 0 : out_list = list_concat(out_list,
330 : pull_ands(((BoolExpr *) subexpr)->args));
331 : else
332 10148 : out_list = lappend(out_list, subexpr);
333 : }
334 3977 : return out_list;
335 : }
336 :
337 : /*
338 : * pull_ors
339 : * Recursively flatten nested OR clauses into a single or-clause list.
340 : *
341 : * Input is the arglist of an OR clause.
342 : * Returns the rebuilt arglist (note original list structure is not touched).
343 : */
344 : static List *
345 328 : pull_ors(List *orlist)
346 : {
347 328 : List *out_list = NIL;
348 : ListCell *arg;
349 :
350 1224 : foreach(arg, orlist)
351 : {
352 896 : Node *subexpr = (Node *) lfirst(arg);
353 :
354 : /*
355 : * Note: we can destructively concat the subexpression's arglist
356 : * because we know the recursive invocation of pull_ors will have
357 : * built a new arglist not shared with any other expr. Otherwise we'd
358 : * need a list_copy here.
359 : */
360 896 : if (or_clause(subexpr))
361 0 : out_list = list_concat(out_list,
362 : pull_ors(((BoolExpr *) subexpr)->args));
363 : else
364 896 : out_list = lappend(out_list, subexpr);
365 : }
366 328 : return out_list;
367 : }
368 :
369 :
370 : /*--------------------
371 : * The following code attempts to apply the inverse OR distributive law:
372 : * ((A AND B) OR (A AND C)) => (A AND (B OR C))
373 : * That is, locate OR clauses in which every subclause contains an
374 : * identical term, and pull out the duplicated terms.
375 : *
376 : * This may seem like a fairly useless activity, but it turns out to be
377 : * applicable to many machine-generated queries, and there are also queries
378 : * in some of the TPC benchmarks that need it. This was in fact almost the
379 : * sole useful side-effect of the old prepqual code that tried to force
380 : * the query into canonical AND-of-ORs form: the canonical equivalent of
381 : * ((A AND B) OR (A AND C))
382 : * is
383 : * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
384 : * which the code was able to simplify to
385 : * (A AND (A OR C) AND (B OR A) AND (B OR C))
386 : * thus successfully extracting the common condition A --- but at the cost
387 : * of cluttering the qual with many redundant clauses.
388 : *--------------------
389 : */
390 :
391 : /*
392 : * find_duplicate_ors
393 : * Given a qualification tree with the NOTs pushed down, search for
394 : * OR clauses to which the inverse OR distributive law might apply.
395 : * Only the top-level AND/OR structure is searched.
396 : *
397 : * While at it, we remove any NULL constants within the top-level AND/OR
398 : * structure, eg "x OR NULL::boolean" is reduced to "x". In general that
399 : * would change the result, so eval_const_expressions can't do it; but at
400 : * top level of WHERE, we don't need to distinguish between FALSE and NULL
401 : * results, so it's valid to treat NULL::boolean the same as FALSE and then
402 : * simplify AND/OR accordingly.
403 : *
404 : * Returns the modified qualification. AND/OR flatness is preserved.
405 : */
406 : static Expr *
407 23530 : find_duplicate_ors(Expr *qual)
408 : {
409 23530 : if (or_clause((Node *) qual))
410 : {
411 328 : List *orlist = NIL;
412 : ListCell *temp;
413 :
414 : /* Recurse */
415 1225 : foreach(temp, ((BoolExpr *) qual)->args)
416 : {
417 897 : Expr *arg = (Expr *) lfirst(temp);
418 :
419 897 : arg = find_duplicate_ors(arg);
420 :
421 : /* Get rid of any constant inputs */
422 897 : if (arg && IsA(arg, Const))
423 : {
424 1 : Const *carg = (Const *) arg;
425 :
426 : /* Drop constant FALSE or NULL */
427 1 : if (carg->constisnull || !DatumGetBool(carg->constvalue))
428 1 : continue;
429 : /* constant TRUE, so OR reduces to TRUE */
430 0 : return arg;
431 : }
432 :
433 896 : orlist = lappend(orlist, arg);
434 : }
435 :
436 : /* Flatten any ORs pulled up to just below here */
437 328 : orlist = pull_ors(orlist);
438 :
439 : /* Now we can look for duplicate ORs */
440 328 : return process_duplicate_ors(orlist);
441 : }
442 23202 : else if (and_clause((Node *) qual))
443 : {
444 3977 : List *andlist = NIL;
445 : ListCell *temp;
446 :
447 : /* Recurse */
448 14125 : foreach(temp, ((BoolExpr *) qual)->args)
449 : {
450 10148 : Expr *arg = (Expr *) lfirst(temp);
451 :
452 10148 : arg = find_duplicate_ors(arg);
453 :
454 : /* Get rid of any constant inputs */
455 10148 : if (arg && IsA(arg, Const))
456 : {
457 0 : Const *carg = (Const *) arg;
458 :
459 : /* Drop constant TRUE */
460 0 : if (!carg->constisnull && DatumGetBool(carg->constvalue))
461 0 : continue;
462 : /* constant FALSE or NULL, so AND reduces to FALSE */
463 0 : return (Expr *) makeBoolConst(false, false);
464 : }
465 :
466 10148 : andlist = lappend(andlist, arg);
467 : }
468 :
469 : /* Flatten any ANDs introduced just below here */
470 3977 : andlist = pull_ands(andlist);
471 :
472 : /* AND of no inputs reduces to TRUE */
473 3977 : if (andlist == NIL)
474 0 : return (Expr *) makeBoolConst(true, false);
475 :
476 : /* Single-expression AND just reduces to that expression */
477 3977 : if (list_length(andlist) == 1)
478 0 : return (Expr *) linitial(andlist);
479 :
480 : /* Else we still need an AND node */
481 3977 : return make_andclause(andlist);
482 : }
483 : else
484 19225 : return qual;
485 : }
486 :
487 : /*
488 : * process_duplicate_ors
489 : * Given a list of exprs which are ORed together, try to apply
490 : * the inverse OR distributive law.
491 : *
492 : * Returns the resulting expression (could be an AND clause, an OR
493 : * clause, or maybe even a single subexpression).
494 : */
495 : static Expr *
496 328 : process_duplicate_ors(List *orlist)
497 : {
498 328 : List *reference = NIL;
499 328 : int num_subclauses = 0;
500 : List *winners;
501 : List *neworlist;
502 : ListCell *temp;
503 :
504 : /* OR of no inputs reduces to FALSE */
505 328 : if (orlist == NIL)
506 0 : return (Expr *) makeBoolConst(false, false);
507 :
508 : /* Single-expression OR just reduces to that expression */
509 328 : if (list_length(orlist) == 1)
510 1 : return (Expr *) linitial(orlist);
511 :
512 : /*
513 : * Choose the shortest AND clause as the reference list --- obviously, any
514 : * subclause not in this clause isn't in all the clauses. If we find a
515 : * clause that's not an AND, we can treat it as a one-element AND clause,
516 : * which necessarily wins as shortest.
517 : */
518 355 : foreach(temp, orlist)
519 : {
520 344 : Expr *clause = (Expr *) lfirst(temp);
521 :
522 344 : if (and_clause((Node *) clause))
523 : {
524 28 : List *subclauses = ((BoolExpr *) clause)->args;
525 28 : int nclauses = list_length(subclauses);
526 :
527 28 : if (reference == NIL || nclauses < num_subclauses)
528 : {
529 15 : reference = subclauses;
530 15 : num_subclauses = nclauses;
531 : }
532 : }
533 : else
534 : {
535 316 : reference = list_make1(clause);
536 316 : break;
537 : }
538 : }
539 :
540 : /*
541 : * Just in case, eliminate any duplicates in the reference list.
542 : */
543 327 : reference = list_union(NIL, reference);
544 :
545 : /*
546 : * Check each element of the reference list to see if it's in all the OR
547 : * clauses. Build a new list of winning clauses.
548 : */
549 327 : winners = NIL;
550 665 : foreach(temp, reference)
551 : {
552 338 : Expr *refclause = (Expr *) lfirst(temp);
553 338 : bool win = true;
554 : ListCell *temp2;
555 :
556 672 : foreach(temp2, orlist)
557 : {
558 672 : Expr *clause = (Expr *) lfirst(temp2);
559 :
560 672 : if (and_clause((Node *) clause))
561 : {
562 65 : if (!list_member(((BoolExpr *) clause)->args, refclause))
563 : {
564 43 : win = false;
565 43 : break;
566 : }
567 : }
568 : else
569 : {
570 607 : if (!equal(refclause, clause))
571 : {
572 295 : win = false;
573 295 : break;
574 : }
575 : }
576 : }
577 :
578 338 : if (win)
579 0 : winners = lappend(winners, refclause);
580 : }
581 :
582 : /*
583 : * If no winners, we can't transform the OR
584 : */
585 327 : if (winners == NIL)
586 327 : return make_orclause(orlist);
587 :
588 : /*
589 : * Generate new OR list consisting of the remaining sub-clauses.
590 : *
591 : * If any clause degenerates to empty, then we have a situation like (A
592 : * AND B) OR (A), which can be reduced to just A --- that is, the
593 : * additional conditions in other arms of the OR are irrelevant.
594 : *
595 : * Note that because we use list_difference, any multiple occurrences of a
596 : * winning clause in an AND sub-clause will be removed automatically.
597 : */
598 0 : neworlist = NIL;
599 0 : foreach(temp, orlist)
600 : {
601 0 : Expr *clause = (Expr *) lfirst(temp);
602 :
603 0 : if (and_clause((Node *) clause))
604 : {
605 0 : List *subclauses = ((BoolExpr *) clause)->args;
606 :
607 0 : subclauses = list_difference(subclauses, winners);
608 0 : if (subclauses != NIL)
609 : {
610 0 : if (list_length(subclauses) == 1)
611 0 : neworlist = lappend(neworlist, linitial(subclauses));
612 : else
613 0 : neworlist = lappend(neworlist, make_andclause(subclauses));
614 : }
615 : else
616 : {
617 0 : neworlist = NIL; /* degenerate case, see above */
618 0 : break;
619 : }
620 : }
621 : else
622 : {
623 0 : if (!list_member(winners, clause))
624 0 : neworlist = lappend(neworlist, clause);
625 : else
626 : {
627 0 : neworlist = NIL; /* degenerate case, see above */
628 0 : break;
629 : }
630 : }
631 : }
632 :
633 : /*
634 : * Append reduced OR to the winners list, if it's not degenerate, handling
635 : * the special case of one element correctly (can that really happen?).
636 : * Also be careful to maintain AND/OR flatness in case we pulled up a
637 : * sub-sub-OR-clause.
638 : */
639 0 : if (neworlist != NIL)
640 : {
641 0 : if (list_length(neworlist) == 1)
642 0 : winners = lappend(winners, linitial(neworlist));
643 : else
644 0 : winners = lappend(winners, make_orclause(pull_ors(neworlist)));
645 : }
646 :
647 : /*
648 : * And return the constructed AND clause, again being wary of a single
649 : * element and AND/OR flatness.
650 : */
651 0 : if (list_length(winners) == 1)
652 0 : return (Expr *) linitial(winners);
653 : else
654 0 : return make_andclause(pull_ands(winners));
655 : }
|