Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeSetOp.c
4 : * Routines to handle INTERSECT and EXCEPT selection
5 : *
6 : * The input of a SetOp node consists of tuples from two relations,
7 : * which have been combined into one dataset, with a junk attribute added
8 : * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 : * the input has furthermore been sorted according to all the grouping
10 : * columns (ie, all the non-junk attributes). The SetOp node scans each
11 : * group of identical tuples to determine how many came from each input
12 : * relation. Then it is a simple matter to emit the output demanded by the
13 : * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14 : *
15 : * In SETOP_HASHED mode, the input is delivered in no particular order,
16 : * except that we know all the tuples from one input relation will come before
17 : * all the tuples of the other. The planner guarantees that the first input
18 : * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 : * input relation come first for INTERSECT. We build a hash table in memory
20 : * with one entry for each group of identical tuples, and count the number of
21 : * tuples in the group from each relation. After seeing all the input, we
22 : * scan the hashtable and generate the correct output using those counts.
23 : * We can avoid making hashtable entries for any tuples appearing only in the
24 : * second input relation, since they cannot result in any output.
25 : *
26 : * This node type is not used for UNION or UNION ALL, since those can be
27 : * implemented more cheaply (there's no need for the junk attribute to
28 : * identify the source relation).
29 : *
30 : * Note that SetOp does no qual checking nor projection. The delivered
31 : * output tuples are just copies of the first-to-arrive tuple in each
32 : * input group.
33 : *
34 : *
35 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
36 : * Portions Copyright (c) 1994, Regents of the University of California
37 : *
38 : *
39 : * IDENTIFICATION
40 : * src/backend/executor/nodeSetOp.c
41 : *
42 : *-------------------------------------------------------------------------
43 : */
44 :
45 : #include "postgres.h"
46 :
47 : #include "access/htup_details.h"
48 : #include "executor/executor.h"
49 : #include "executor/nodeSetOp.h"
50 : #include "miscadmin.h"
51 : #include "utils/memutils.h"
52 :
53 :
54 : /*
55 : * SetOpStatePerGroupData - per-group working state
56 : *
57 : * These values are working state that is initialized at the start of
58 : * an input tuple group and updated for each input tuple.
59 : *
60 : * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61 : * the plan state node. In SETOP_HASHED mode, the hash table contains one
62 : * of these for each tuple group.
63 : */
64 : typedef struct SetOpStatePerGroupData
65 : {
66 : long numLeft; /* number of left-input dups in group */
67 : long numRight; /* number of right-input dups in group */
68 : } SetOpStatePerGroupData;
69 :
70 :
71 : static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
72 : static void setop_fill_hash_table(SetOpState *setopstate);
73 : static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
74 :
75 :
76 : /*
77 : * Initialize state for a new group of input values.
78 : */
79 : static inline void
80 35412 : initialize_counts(SetOpStatePerGroup pergroup)
81 : {
82 35412 : pergroup->numLeft = pergroup->numRight = 0;
83 35412 : }
84 :
85 : /*
86 : * Advance the appropriate counter for one input tuple.
87 : */
88 : static inline void
89 75854 : advance_counts(SetOpStatePerGroup pergroup, int flag)
90 : {
91 75854 : if (flag)
92 40403 : pergroup->numRight++;
93 : else
94 35451 : pergroup->numLeft++;
95 75854 : }
96 :
97 : /*
98 : * Fetch the "flag" column from an input tuple.
99 : * This is an integer column with value 0 for left side, 1 for right side.
100 : */
101 : static int
102 80883 : fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
103 : {
104 80883 : SetOp *node = (SetOp *) setopstate->ps.plan;
105 : int flag;
106 : bool isNull;
107 :
108 80883 : flag = DatumGetInt32(slot_getattr(inputslot,
109 : node->flagColIdx,
110 : &isNull));
111 80883 : Assert(!isNull);
112 80883 : Assert(flag == 0 || flag == 1);
113 80883 : return flag;
114 : }
115 :
116 : /*
117 : * Initialize the hash table to empty.
118 : */
119 : static void
120 33 : build_hash_table(SetOpState *setopstate)
121 : {
122 33 : SetOp *node = (SetOp *) setopstate->ps.plan;
123 :
124 33 : Assert(node->strategy == SETOP_HASHED);
125 33 : Assert(node->numGroups > 0);
126 :
127 33 : setopstate->hashtable = BuildTupleHashTable(node->numCols,
128 : node->dupColIdx,
129 : setopstate->eqfunctions,
130 : setopstate->hashfunctions,
131 : node->numGroups,
132 : 0,
133 : setopstate->tableContext,
134 : setopstate->tempContext,
135 : false);
136 33 : }
137 :
138 : /*
139 : * We've completed processing a tuple group. Decide how many copies (if any)
140 : * of its representative row to emit, and store the count into numOutput.
141 : * This logic is straight from the SQL92 specification.
142 : */
143 : static void
144 35412 : set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
145 : {
146 35412 : SetOp *plannode = (SetOp *) setopstate->ps.plan;
147 :
148 35412 : switch (plannode->cmd)
149 : {
150 : case SETOPCMD_INTERSECT:
151 15022 : if (pergroup->numLeft > 0 && pergroup->numRight > 0)
152 10013 : setopstate->numOutput = 1;
153 : else
154 5009 : setopstate->numOutput = 0;
155 15022 : break;
156 : case SETOPCMD_INTERSECT_ALL:
157 4 : setopstate->numOutput =
158 : (pergroup->numLeft < pergroup->numRight) ?
159 4 : pergroup->numLeft : pergroup->numRight;
160 4 : break;
161 : case SETOPCMD_EXCEPT:
162 20374 : if (pergroup->numLeft > 0 && pergroup->numRight == 0)
163 23 : setopstate->numOutput = 1;
164 : else
165 20351 : setopstate->numOutput = 0;
166 20374 : break;
167 : case SETOPCMD_EXCEPT_ALL:
168 12 : setopstate->numOutput =
169 12 : (pergroup->numLeft < pergroup->numRight) ?
170 12 : 0 : (pergroup->numLeft - pergroup->numRight);
171 12 : break;
172 : default:
173 0 : elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
174 : break;
175 : }
176 35412 : }
177 :
178 :
179 : /* ----------------------------------------------------------------
180 : * ExecSetOp
181 : * ----------------------------------------------------------------
182 : */
183 : static TupleTableSlot * /* return: a tuple or NULL */
184 10082 : ExecSetOp(PlanState *pstate)
185 : {
186 10082 : SetOpState *node = castNode(SetOpState, pstate);
187 10082 : SetOp *plannode = (SetOp *) node->ps.plan;
188 10082 : TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
189 :
190 10082 : CHECK_FOR_INTERRUPTS();
191 :
192 : /*
193 : * If the previously-returned tuple needs to be returned more than once,
194 : * keep returning it.
195 : */
196 10082 : if (node->numOutput > 0)
197 : {
198 2 : node->numOutput--;
199 2 : return resultTupleSlot;
200 : }
201 :
202 : /* Otherwise, we're done if we are out of groups */
203 10080 : if (node->setop_done)
204 0 : return NULL;
205 :
206 : /* Fetch the next tuple group according to the correct strategy */
207 10080 : if (plannode->strategy == SETOP_HASHED)
208 : {
209 5077 : if (!node->table_filled)
210 31 : setop_fill_hash_table(node);
211 5077 : return setop_retrieve_hash_table(node);
212 : }
213 : else
214 5003 : return setop_retrieve_direct(node);
215 : }
216 :
217 : /*
218 : * ExecSetOp for non-hashed case
219 : */
220 : static TupleTableSlot *
221 5003 : setop_retrieve_direct(SetOpState *setopstate)
222 : {
223 5003 : SetOp *node = (SetOp *) setopstate->ps.plan;
224 : PlanState *outerPlan;
225 : SetOpStatePerGroup pergroup;
226 : TupleTableSlot *outerslot;
227 : TupleTableSlot *resultTupleSlot;
228 :
229 : /*
230 : * get state info from node
231 : */
232 5003 : outerPlan = outerPlanState(setopstate);
233 5003 : pergroup = (SetOpStatePerGroup) setopstate->pergroup;
234 5003 : resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
235 :
236 : /*
237 : * We loop retrieving groups until we find one we should return
238 : */
239 25005 : while (!setopstate->setop_done)
240 : {
241 : /*
242 : * If we don't already have the first tuple of the new group, fetch it
243 : * from the outer plan.
244 : */
245 20000 : if (setopstate->grp_firstTuple == NULL)
246 : {
247 2 : outerslot = ExecProcNode(outerPlan);
248 2 : if (!TupIsNull(outerslot))
249 : {
250 : /* Make a copy of the first input tuple */
251 2 : setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
252 : }
253 : else
254 : {
255 : /* outer plan produced no tuples at all */
256 0 : setopstate->setop_done = true;
257 0 : return NULL;
258 : }
259 : }
260 :
261 : /*
262 : * Store the copied first input tuple in the tuple table slot reserved
263 : * for it. The tuple will be deleted when it is cleared from the
264 : * slot.
265 : */
266 20000 : ExecStoreTuple(setopstate->grp_firstTuple,
267 : resultTupleSlot,
268 : InvalidBuffer,
269 : true);
270 20000 : setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
271 :
272 : /* Initialize working state for a new input tuple group */
273 20000 : initialize_counts(pergroup);
274 :
275 : /* Count the first input tuple */
276 20000 : advance_counts(pergroup,
277 : fetch_tuple_flag(setopstate, resultTupleSlot));
278 :
279 : /*
280 : * Scan the outer plan until we exhaust it or cross a group boundary.
281 : */
282 : for (;;)
283 : {
284 39999 : outerslot = ExecProcNode(outerPlan);
285 39999 : if (TupIsNull(outerslot))
286 : {
287 : /* no more outer-plan tuples available */
288 2 : setopstate->setop_done = true;
289 2 : break;
290 : }
291 :
292 : /*
293 : * Check whether we've crossed a group boundary.
294 : */
295 39997 : if (!execTuplesMatch(resultTupleSlot,
296 : outerslot,
297 : node->numCols, node->dupColIdx,
298 : setopstate->eqfunctions,
299 : setopstate->tempContext))
300 : {
301 : /*
302 : * Save the first input tuple of the next group.
303 : */
304 19998 : setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
305 19998 : break;
306 : }
307 :
308 : /* Still in same group, so count this tuple */
309 19999 : advance_counts(pergroup,
310 : fetch_tuple_flag(setopstate, outerslot));
311 19999 : }
312 :
313 : /*
314 : * Done scanning input tuple group. See if we should emit any copies
315 : * of result tuple, and if so return the first copy.
316 : */
317 20000 : set_output_count(setopstate, pergroup);
318 :
319 20000 : if (setopstate->numOutput > 0)
320 : {
321 5001 : setopstate->numOutput--;
322 5001 : return resultTupleSlot;
323 : }
324 : }
325 :
326 : /* No more groups */
327 2 : ExecClearTuple(resultTupleSlot);
328 2 : return NULL;
329 : }
330 :
331 : /*
332 : * ExecSetOp for hashed case: phase 1, read input and build hash table
333 : */
334 : static void
335 31 : setop_fill_hash_table(SetOpState *setopstate)
336 : {
337 31 : SetOp *node = (SetOp *) setopstate->ps.plan;
338 : PlanState *outerPlan;
339 : int firstFlag;
340 : bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
341 :
342 : /*
343 : * get state info from node
344 : */
345 31 : outerPlan = outerPlanState(setopstate);
346 31 : firstFlag = node->firstFlag;
347 : /* verify planner didn't mess up */
348 31 : Assert(firstFlag == 0 ||
349 : (firstFlag == 1 &&
350 : (node->cmd == SETOPCMD_INTERSECT ||
351 : node->cmd == SETOPCMD_INTERSECT_ALL)));
352 :
353 : /*
354 : * Process each outer-plan tuple, and then fetch the next one, until we
355 : * exhaust the outer plan.
356 : */
357 31 : in_first_rel = true;
358 : for (;;)
359 : {
360 : TupleTableSlot *outerslot;
361 : int flag;
362 : TupleHashEntryData *entry;
363 : bool isnew;
364 :
365 40915 : outerslot = ExecProcNode(outerPlan);
366 40915 : if (TupIsNull(outerslot))
367 : break;
368 :
369 : /* Identify whether it's left or right input */
370 40884 : flag = fetch_tuple_flag(setopstate, outerslot);
371 :
372 40884 : if (flag == firstFlag)
373 : {
374 : /* (still) in first input relation */
375 20451 : Assert(in_first_rel);
376 :
377 : /* Find or build hashtable entry for this tuple's group */
378 20451 : entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
379 : &isnew);
380 :
381 : /* If new tuple group, initialize counts */
382 20451 : if (isnew)
383 : {
384 15412 : entry->additional = (SetOpStatePerGroup)
385 15412 : MemoryContextAlloc(setopstate->hashtable->tablecxt,
386 : sizeof(SetOpStatePerGroupData));
387 15412 : initialize_counts((SetOpStatePerGroup) entry->additional);
388 : }
389 :
390 : /* Advance the counts */
391 20451 : advance_counts((SetOpStatePerGroup) entry->additional, flag);
392 : }
393 : else
394 : {
395 : /* reached second relation */
396 20433 : in_first_rel = false;
397 :
398 : /* For tuples not seen previously, do not make hashtable entry */
399 20433 : entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
400 : NULL);
401 :
402 : /* Advance the counts if entry is already present */
403 20433 : if (entry)
404 15404 : advance_counts((SetOpStatePerGroup) entry->additional, flag);
405 : }
406 :
407 : /* Must reset temp context after each hashtable lookup */
408 40884 : MemoryContextReset(setopstate->tempContext);
409 40884 : }
410 :
411 31 : setopstate->table_filled = true;
412 : /* Initialize to walk the hash table */
413 31 : ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
414 31 : }
415 :
416 : /*
417 : * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
418 : */
419 : static TupleTableSlot *
420 5077 : setop_retrieve_hash_table(SetOpState *setopstate)
421 : {
422 : TupleHashEntryData *entry;
423 : TupleTableSlot *resultTupleSlot;
424 :
425 : /*
426 : * get state info from node
427 : */
428 5077 : resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
429 :
430 : /*
431 : * We loop retrieving groups until we find one we should return
432 : */
433 20520 : while (!setopstate->setop_done)
434 : {
435 15443 : CHECK_FOR_INTERRUPTS();
436 :
437 : /*
438 : * Find the next entry in the hash table
439 : */
440 15443 : entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
441 15443 : if (entry == NULL)
442 : {
443 : /* No more entries in hashtable, so done */
444 31 : setopstate->setop_done = true;
445 31 : return NULL;
446 : }
447 :
448 : /*
449 : * See if we should emit any copies of this tuple, and if so return
450 : * the first copy.
451 : */
452 15412 : set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
453 :
454 15412 : if (setopstate->numOutput > 0)
455 : {
456 5046 : setopstate->numOutput--;
457 5046 : return ExecStoreMinimalTuple(entry->firstTuple,
458 : resultTupleSlot,
459 : false);
460 : }
461 : }
462 :
463 : /* No more groups */
464 0 : ExecClearTuple(resultTupleSlot);
465 0 : return NULL;
466 : }
467 :
468 : /* ----------------------------------------------------------------
469 : * ExecInitSetOp
470 : *
471 : * This initializes the setop node state structures and
472 : * the node's subplan.
473 : * ----------------------------------------------------------------
474 : */
475 : SetOpState *
476 37 : ExecInitSetOp(SetOp *node, EState *estate, int eflags)
477 : {
478 : SetOpState *setopstate;
479 :
480 : /* check for unsupported flags */
481 37 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
482 :
483 : /*
484 : * create state structure
485 : */
486 37 : setopstate = makeNode(SetOpState);
487 37 : setopstate->ps.plan = (Plan *) node;
488 37 : setopstate->ps.state = estate;
489 37 : setopstate->ps.ExecProcNode = ExecSetOp;
490 :
491 37 : setopstate->eqfunctions = NULL;
492 37 : setopstate->hashfunctions = NULL;
493 37 : setopstate->setop_done = false;
494 37 : setopstate->numOutput = 0;
495 37 : setopstate->pergroup = NULL;
496 37 : setopstate->grp_firstTuple = NULL;
497 37 : setopstate->hashtable = NULL;
498 37 : setopstate->tableContext = NULL;
499 :
500 : /*
501 : * Miscellaneous initialization
502 : *
503 : * SetOp nodes have no ExprContext initialization because they never call
504 : * ExecQual or ExecProject. But they do need a per-tuple memory context
505 : * anyway for calling execTuplesMatch.
506 : */
507 37 : setopstate->tempContext =
508 37 : AllocSetContextCreate(CurrentMemoryContext,
509 : "SetOp",
510 : ALLOCSET_DEFAULT_SIZES);
511 :
512 : /*
513 : * If hashing, we also need a longer-lived context to store the hash
514 : * table. The table can't just be kept in the per-query context because
515 : * we want to be able to throw it away in ExecReScanSetOp.
516 : */
517 37 : if (node->strategy == SETOP_HASHED)
518 33 : setopstate->tableContext =
519 33 : AllocSetContextCreate(CurrentMemoryContext,
520 : "SetOp hash table",
521 : ALLOCSET_DEFAULT_SIZES);
522 :
523 : /*
524 : * Tuple table initialization
525 : */
526 37 : ExecInitResultTupleSlot(estate, &setopstate->ps);
527 :
528 : /*
529 : * initialize child nodes
530 : *
531 : * If we are hashing then the child plan does not need to handle REWIND
532 : * efficiently; see ExecReScanSetOp.
533 : */
534 37 : if (node->strategy == SETOP_HASHED)
535 33 : eflags &= ~EXEC_FLAG_REWIND;
536 37 : outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
537 :
538 : /*
539 : * setop nodes do no projections, so initialize projection info for this
540 : * node appropriately
541 : */
542 37 : ExecAssignResultTypeFromTL(&setopstate->ps);
543 37 : setopstate->ps.ps_ProjInfo = NULL;
544 :
545 : /*
546 : * Precompute fmgr lookup data for inner loop. We need both equality and
547 : * hashing functions to do it by hashing, but only equality if not
548 : * hashing.
549 : */
550 37 : if (node->strategy == SETOP_HASHED)
551 33 : execTuplesHashPrepare(node->numCols,
552 : node->dupOperators,
553 : &setopstate->eqfunctions,
554 : &setopstate->hashfunctions);
555 : else
556 4 : setopstate->eqfunctions =
557 4 : execTuplesMatchPrepare(node->numCols,
558 : node->dupOperators);
559 :
560 37 : if (node->strategy == SETOP_HASHED)
561 : {
562 33 : build_hash_table(setopstate);
563 33 : setopstate->table_filled = false;
564 : }
565 : else
566 : {
567 4 : setopstate->pergroup =
568 4 : (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
569 : }
570 :
571 37 : return setopstate;
572 : }
573 :
574 : /* ----------------------------------------------------------------
575 : * ExecEndSetOp
576 : *
577 : * This shuts down the subplan and frees resources allocated
578 : * to this node.
579 : * ----------------------------------------------------------------
580 : */
581 : void
582 37 : ExecEndSetOp(SetOpState *node)
583 : {
584 : /* clean up tuple table */
585 37 : ExecClearTuple(node->ps.ps_ResultTupleSlot);
586 :
587 : /* free subsidiary stuff including hashtable */
588 37 : MemoryContextDelete(node->tempContext);
589 37 : if (node->tableContext)
590 33 : MemoryContextDelete(node->tableContext);
591 :
592 37 : ExecEndNode(outerPlanState(node));
593 37 : }
594 :
595 :
596 : void
597 0 : ExecReScanSetOp(SetOpState *node)
598 : {
599 0 : ExecClearTuple(node->ps.ps_ResultTupleSlot);
600 0 : node->setop_done = false;
601 0 : node->numOutput = 0;
602 :
603 0 : if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
604 : {
605 : /*
606 : * In the hashed case, if we haven't yet built the hash table then we
607 : * can just return; nothing done yet, so nothing to undo. If subnode's
608 : * chgParam is not NULL then it will be re-scanned by ExecProcNode,
609 : * else no reason to re-scan it at all.
610 : */
611 0 : if (!node->table_filled)
612 0 : return;
613 :
614 : /*
615 : * If we do have the hash table and the subplan does not have any
616 : * parameter changes, then we can just rescan the existing hash table;
617 : * no need to build it again.
618 : */
619 0 : if (node->ps.lefttree->chgParam == NULL)
620 : {
621 0 : ResetTupleHashIterator(node->hashtable, &node->hashiter);
622 0 : return;
623 : }
624 : }
625 :
626 : /* Release first tuple of group, if we have made a copy */
627 0 : if (node->grp_firstTuple != NULL)
628 : {
629 0 : heap_freetuple(node->grp_firstTuple);
630 0 : node->grp_firstTuple = NULL;
631 : }
632 :
633 : /* Release any hashtable storage */
634 0 : if (node->tableContext)
635 0 : MemoryContextResetAndDeleteChildren(node->tableContext);
636 :
637 : /* And rebuild empty hashtable if needed */
638 0 : if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
639 : {
640 0 : build_hash_table(node);
641 0 : node->table_filled = false;
642 : }
643 :
644 : /*
645 : * if chgParam of subnode is not null then plan will be re-scanned by
646 : * first ExecProcNode.
647 : */
648 0 : if (node->ps.lefttree->chgParam == NULL)
649 0 : ExecReScan(node->ps.lefttree);
650 : }
|