Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeHashjoin.c
4 : * Routines to handle hash join nodes
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeHashjoin.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/htup_details.h"
19 : #include "executor/executor.h"
20 : #include "executor/hashjoin.h"
21 : #include "executor/nodeHash.h"
22 : #include "executor/nodeHashjoin.h"
23 : #include "miscadmin.h"
24 : #include "utils/memutils.h"
25 :
26 :
27 : /*
28 : * States of the ExecHashJoin state machine
29 : */
30 : #define HJ_BUILD_HASHTABLE 1
31 : #define HJ_NEED_NEW_OUTER 2
32 : #define HJ_SCAN_BUCKET 3
33 : #define HJ_FILL_OUTER_TUPLE 4
34 : #define HJ_FILL_INNER_TUPLES 5
35 : #define HJ_NEED_NEW_BATCH 6
36 :
37 : /* Returns true if doing null-fill on outer relation */
38 : #define HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
39 : /* Returns true if doing null-fill on inner relation */
40 : #define HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL)
41 :
42 : static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
43 : HashJoinState *hjstate,
44 : uint32 *hashvalue);
45 : static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
46 : BufFile *file,
47 : uint32 *hashvalue,
48 : TupleTableSlot *tupleSlot);
49 : static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
50 :
51 :
52 : /* ----------------------------------------------------------------
53 : * ExecHashJoin
54 : *
55 : * This function implements the Hybrid Hashjoin algorithm.
56 : *
57 : * Note: the relation we build hash table on is the "inner"
58 : * the other one is "outer".
59 : * ----------------------------------------------------------------
60 : */
61 : static TupleTableSlot * /* return: a tuple or NULL */
62 166839 : ExecHashJoin(PlanState *pstate)
63 : {
64 166839 : HashJoinState *node = castNode(HashJoinState, pstate);
65 : PlanState *outerNode;
66 : HashState *hashNode;
67 : ExprState *joinqual;
68 : ExprState *otherqual;
69 : ExprContext *econtext;
70 : HashJoinTable hashtable;
71 : TupleTableSlot *outerTupleSlot;
72 : uint32 hashvalue;
73 : int batchno;
74 :
75 : /*
76 : * get information from HashJoin node
77 : */
78 166839 : joinqual = node->js.joinqual;
79 166839 : otherqual = node->js.ps.qual;
80 166839 : hashNode = (HashState *) innerPlanState(node);
81 166839 : outerNode = outerPlanState(node);
82 166839 : hashtable = node->hj_HashTable;
83 166839 : econtext = node->js.ps.ps_ExprContext;
84 :
85 : /*
86 : * Reset per-tuple memory context to free any expression evaluation
87 : * storage allocated in the previous tuple cycle.
88 : */
89 166839 : ResetExprContext(econtext);
90 :
91 : /*
92 : * run the hash join state machine
93 : */
94 : for (;;)
95 : {
96 : /*
97 : * It's possible to iterate this loop many times before returning a
98 : * tuple, in some pathological cases such as needing to move much of
99 : * the current batch to a later batch. So let's check for interrupts
100 : * each time through.
101 : */
102 886637 : CHECK_FOR_INTERRUPTS();
103 :
104 886637 : switch (node->hj_JoinState)
105 : {
106 : case HJ_BUILD_HASHTABLE:
107 :
108 : /*
109 : * First time through: build hash table for inner relation.
110 : */
111 1247 : Assert(hashtable == NULL);
112 :
113 : /*
114 : * If the outer relation is completely empty, and it's not
115 : * right/full join, we can quit without building the hash
116 : * table. However, for an inner join it is only a win to
117 : * check this when the outer relation's startup cost is less
118 : * than the projected cost of building the hash table.
119 : * Otherwise it's best to build the hash table first and see
120 : * if the inner relation is empty. (When it's a left join, we
121 : * should always make this check, since we aren't going to be
122 : * able to skip the join on the strength of an empty inner
123 : * relation anyway.)
124 : *
125 : * If we are rescanning the join, we make use of information
126 : * gained on the previous scan: don't bother to try the
127 : * prefetch if the previous scan found the outer relation
128 : * nonempty. This is not 100% reliable since with new
129 : * parameters the outer relation might yield different
130 : * results, but it's a good heuristic.
131 : *
132 : * The only way to make the check is to try to fetch a tuple
133 : * from the outer plan node. If we succeed, we have to stash
134 : * it away for later consumption by ExecHashJoinOuterGetTuple.
135 : */
136 1247 : if (HJ_FILL_INNER(node))
137 : {
138 : /* no chance to not build the hash table */
139 173 : node->hj_FirstOuterTupleSlot = NULL;
140 : }
141 1904 : else if (HJ_FILL_OUTER(node) ||
142 1645 : (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
143 815 : !node->hj_OuterNotEmpty))
144 : {
145 947 : node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
146 1654 : if (TupIsNull(node->hj_FirstOuterTupleSlot))
147 : {
148 240 : node->hj_OuterNotEmpty = false;
149 240 : return NULL;
150 : }
151 : else
152 707 : node->hj_OuterNotEmpty = true;
153 : }
154 : else
155 127 : node->hj_FirstOuterTupleSlot = NULL;
156 :
157 : /*
158 : * create the hash table
159 : */
160 1007 : hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
161 : node->hj_HashOperators,
162 1007 : HJ_FILL_INNER(node));
163 1007 : node->hj_HashTable = hashtable;
164 :
165 : /*
166 : * execute the Hash node, to build the hash table
167 : */
168 1007 : hashNode->hashtable = hashtable;
169 1007 : (void) MultiExecProcNode((PlanState *) hashNode);
170 :
171 : /*
172 : * If the inner relation is completely empty, and we're not
173 : * doing a left outer join, we can quit without scanning the
174 : * outer relation.
175 : */
176 1007 : if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
177 160 : return NULL;
178 :
179 : /*
180 : * need to remember whether nbatch has increased since we
181 : * began scanning the outer relation
182 : */
183 847 : hashtable->nbatch_outstart = hashtable->nbatch;
184 :
185 : /*
186 : * Reset OuterNotEmpty for scan. (It's OK if we fetched a
187 : * tuple above, because ExecHashJoinOuterGetTuple will
188 : * immediately set it again.)
189 : */
190 847 : node->hj_OuterNotEmpty = false;
191 :
192 847 : node->hj_JoinState = HJ_NEED_NEW_OUTER;
193 :
194 : /* FALL THRU */
195 :
196 : case HJ_NEED_NEW_OUTER:
197 :
198 : /*
199 : * We don't have an outer tuple, try to get the next one
200 : */
201 408890 : outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
202 : node,
203 : &hashvalue);
204 408890 : if (TupIsNull(outerTupleSlot))
205 : {
206 : /* end of batch, or maybe whole join */
207 861 : if (HJ_FILL_INNER(node))
208 : {
209 : /* set up to scan for unmatched inner tuples */
210 173 : ExecPrepHashTableForUnmatched(node);
211 173 : node->hj_JoinState = HJ_FILL_INNER_TUPLES;
212 : }
213 : else
214 688 : node->hj_JoinState = HJ_NEED_NEW_BATCH;
215 861 : continue;
216 : }
217 :
218 408029 : econtext->ecxt_outertuple = outerTupleSlot;
219 408029 : node->hj_MatchedOuter = false;
220 :
221 : /*
222 : * Find the corresponding bucket for this tuple in the main
223 : * hash table or skew hash table.
224 : */
225 408029 : node->hj_CurHashValue = hashvalue;
226 408029 : ExecHashGetBucketAndBatch(hashtable, hashvalue,
227 : &node->hj_CurBucketNo, &batchno);
228 408029 : node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
229 : hashvalue);
230 408029 : node->hj_CurTuple = NULL;
231 :
232 : /*
233 : * The tuple might not belong to the current batch (where
234 : * "current batch" includes the skew buckets if any).
235 : */
236 416629 : if (batchno != hashtable->curbatch &&
237 8600 : node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
238 : {
239 : /*
240 : * Need to postpone this outer tuple to a later batch.
241 : * Save it in the corresponding outer-batch file.
242 : */
243 8400 : Assert(batchno > hashtable->curbatch);
244 16800 : ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
245 : hashvalue,
246 16800 : &hashtable->outerBatchFile[batchno]);
247 : /* Loop around, staying in HJ_NEED_NEW_OUTER state */
248 8400 : continue;
249 : }
250 :
251 : /* OK, let's scan the bucket for matches */
252 399629 : node->hj_JoinState = HJ_SCAN_BUCKET;
253 :
254 : /* FALL THRU */
255 :
256 : case HJ_SCAN_BUCKET:
257 :
258 : /*
259 : * Scan the selected hash bucket for matches to current outer
260 : */
261 558875 : if (!ExecScanHashBucket(node, econtext))
262 : {
263 : /* out of matches; check for possible outer-join fill */
264 316470 : node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
265 316470 : continue;
266 : }
267 :
268 : /*
269 : * We've got a match, but still need to test non-hashed quals.
270 : * ExecScanHashBucket already set up all the state needed to
271 : * call ExecQual.
272 : *
273 : * If we pass the qual, then save state for next call and have
274 : * ExecProject form the projection, store it in the tuple
275 : * table, and return the slot.
276 : *
277 : * Only the joinquals determine tuple match status, but all
278 : * quals must pass to actually return the tuple.
279 : */
280 242405 : if (joinqual == NULL || ExecQual(joinqual, econtext))
281 : {
282 223117 : node->hj_MatchedOuter = true;
283 223117 : HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
284 :
285 : /* In an antijoin, we never return a matched tuple */
286 223117 : if (node->js.jointype == JOIN_ANTI)
287 : {
288 77233 : node->hj_JoinState = HJ_NEED_NEW_OUTER;
289 77233 : continue;
290 : }
291 :
292 : /*
293 : * If we only need to join to the first matching inner
294 : * tuple, then consider returning this one, but after that
295 : * continue with next outer tuple.
296 : */
297 145884 : if (node->js.single_match)
298 5918 : node->hj_JoinState = HJ_NEED_NEW_OUTER;
299 :
300 146356 : if (otherqual == NULL || ExecQual(otherqual, econtext))
301 145412 : return ExecProject(node->js.ps.ps_ProjInfo);
302 : else
303 472 : InstrCountFiltered2(node, 1);
304 : }
305 : else
306 19288 : InstrCountFiltered1(node, 1);
307 19760 : break;
308 :
309 : case HJ_FILL_OUTER_TUPLE:
310 :
311 : /*
312 : * The current outer tuple has run out of matches, so check
313 : * whether to emit a dummy outer-join tuple. Whether we emit
314 : * one or not, the next state is NEED_NEW_OUTER.
315 : */
316 316470 : node->hj_JoinState = HJ_NEED_NEW_OUTER;
317 :
318 583528 : if (!node->hj_MatchedOuter &&
319 267058 : HJ_FILL_OUTER(node))
320 : {
321 : /*
322 : * Generate a fake join tuple with nulls for the inner
323 : * tuple, and return it if it passes the non-join quals.
324 : */
325 20038 : econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
326 :
327 20038 : if (otherqual == NULL || ExecQual(otherqual, econtext))
328 20036 : return ExecProject(node->js.ps.ps_ProjInfo);
329 : else
330 2 : InstrCountFiltered2(node, 1);
331 : }
332 296434 : break;
333 :
334 : case HJ_FILL_INNER_TUPLES:
335 :
336 : /*
337 : * We have finished a batch, but we are doing right/full join,
338 : * so any unmatched inner tuples in the hashtable have to be
339 : * emitted before we continue to the next batch.
340 : */
341 771 : if (!ExecScanHashTableForUnmatched(node, econtext))
342 : {
343 : /* no more unmatched tuples */
344 172 : node->hj_JoinState = HJ_NEED_NEW_BATCH;
345 172 : continue;
346 : }
347 :
348 : /*
349 : * Generate a fake join tuple with nulls for the outer tuple,
350 : * and return it if it passes the non-join quals.
351 : */
352 599 : econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
353 :
354 599 : if (otherqual == NULL || ExecQual(otherqual, econtext))
355 138 : return ExecProject(node->js.ps.ps_ProjInfo);
356 : else
357 461 : InstrCountFiltered2(node, 1);
358 461 : break;
359 :
360 : case HJ_NEED_NEW_BATCH:
361 :
362 : /*
363 : * Try to advance to next batch. Done if there are no more.
364 : */
365 860 : if (!ExecHashJoinNewBatch(node))
366 853 : return NULL; /* end of join */
367 7 : node->hj_JoinState = HJ_NEED_NEW_OUTER;
368 7 : break;
369 :
370 : default:
371 0 : elog(ERROR, "unrecognized hashjoin state: %d",
372 : (int) node->hj_JoinState);
373 : }
374 719798 : }
375 : }
376 :
377 : /* ----------------------------------------------------------------
378 : * ExecInitHashJoin
379 : *
380 : * Init routine for HashJoin node.
381 : * ----------------------------------------------------------------
382 : */
383 : HashJoinState *
384 1361 : ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
385 : {
386 : HashJoinState *hjstate;
387 : Plan *outerNode;
388 : Hash *hashNode;
389 : List *lclauses;
390 : List *rclauses;
391 : List *hoperators;
392 : ListCell *l;
393 :
394 : /* check for unsupported flags */
395 1361 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
396 :
397 : /*
398 : * create state structure
399 : */
400 1361 : hjstate = makeNode(HashJoinState);
401 1361 : hjstate->js.ps.plan = (Plan *) node;
402 1361 : hjstate->js.ps.state = estate;
403 1361 : hjstate->js.ps.ExecProcNode = ExecHashJoin;
404 :
405 : /*
406 : * Miscellaneous initialization
407 : *
408 : * create expression context for node
409 : */
410 1361 : ExecAssignExprContext(estate, &hjstate->js.ps);
411 :
412 : /*
413 : * initialize child expressions
414 : */
415 1361 : hjstate->js.ps.qual =
416 1361 : ExecInitQual(node->join.plan.qual, (PlanState *) hjstate);
417 1361 : hjstate->js.jointype = node->join.jointype;
418 1361 : hjstate->js.joinqual =
419 1361 : ExecInitQual(node->join.joinqual, (PlanState *) hjstate);
420 1361 : hjstate->hashclauses =
421 1361 : ExecInitQual(node->hashclauses, (PlanState *) hjstate);
422 :
423 : /*
424 : * initialize child nodes
425 : *
426 : * Note: we could suppress the REWIND flag for the inner input, which
427 : * would amount to betting that the hash will be a single batch. Not
428 : * clear if this would be a win or not.
429 : */
430 1361 : outerNode = outerPlan(node);
431 1361 : hashNode = (Hash *) innerPlan(node);
432 :
433 1361 : outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
434 1361 : innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
435 :
436 : /*
437 : * tuple table initialization
438 : */
439 1361 : ExecInitResultTupleSlot(estate, &hjstate->js.ps);
440 1361 : hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
441 :
442 : /*
443 : * detect whether we need only consider the first matching inner tuple
444 : */
445 2228 : hjstate->js.single_match = (node->join.inner_unique ||
446 867 : node->join.jointype == JOIN_SEMI);
447 :
448 : /* set up null tuples for outer joins, if needed */
449 1361 : switch (node->join.jointype)
450 : {
451 : case JOIN_INNER:
452 : case JOIN_SEMI:
453 921 : break;
454 : case JOIN_LEFT:
455 : case JOIN_ANTI:
456 254 : hjstate->hj_NullInnerTupleSlot =
457 254 : ExecInitNullTupleSlot(estate,
458 254 : ExecGetResultType(innerPlanState(hjstate)));
459 254 : break;
460 : case JOIN_RIGHT:
461 173 : hjstate->hj_NullOuterTupleSlot =
462 173 : ExecInitNullTupleSlot(estate,
463 173 : ExecGetResultType(outerPlanState(hjstate)));
464 173 : break;
465 : case JOIN_FULL:
466 13 : hjstate->hj_NullOuterTupleSlot =
467 13 : ExecInitNullTupleSlot(estate,
468 13 : ExecGetResultType(outerPlanState(hjstate)));
469 13 : hjstate->hj_NullInnerTupleSlot =
470 13 : ExecInitNullTupleSlot(estate,
471 13 : ExecGetResultType(innerPlanState(hjstate)));
472 13 : break;
473 : default:
474 0 : elog(ERROR, "unrecognized join type: %d",
475 : (int) node->join.jointype);
476 : }
477 :
478 : /*
479 : * now for some voodoo. our temporary tuple slot is actually the result
480 : * tuple slot of the Hash node (which is our inner plan). we can do this
481 : * because Hash nodes don't return tuples via ExecProcNode() -- instead
482 : * the hash join node uses ExecScanHashBucket() to get at the contents of
483 : * the hash table. -cim 6/9/91
484 : */
485 : {
486 1361 : HashState *hashstate = (HashState *) innerPlanState(hjstate);
487 1361 : TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
488 :
489 1361 : hjstate->hj_HashTupleSlot = slot;
490 : }
491 :
492 : /*
493 : * initialize tuple type and projection info
494 : */
495 1361 : ExecAssignResultTypeFromTL(&hjstate->js.ps);
496 1361 : ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
497 :
498 1361 : ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
499 1361 : ExecGetResultType(outerPlanState(hjstate)));
500 :
501 : /*
502 : * initialize hash-specific info
503 : */
504 1361 : hjstate->hj_HashTable = NULL;
505 1361 : hjstate->hj_FirstOuterTupleSlot = NULL;
506 :
507 1361 : hjstate->hj_CurHashValue = 0;
508 1361 : hjstate->hj_CurBucketNo = 0;
509 1361 : hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
510 1361 : hjstate->hj_CurTuple = NULL;
511 :
512 : /*
513 : * Deconstruct the hash clauses into outer and inner argument values, so
514 : * that we can evaluate those subexpressions separately. Also make a list
515 : * of the hash operator OIDs, in preparation for looking up the hash
516 : * functions to use.
517 : */
518 1361 : lclauses = NIL;
519 1361 : rclauses = NIL;
520 1361 : hoperators = NIL;
521 2793 : foreach(l, node->hashclauses)
522 : {
523 1432 : OpExpr *hclause = lfirst_node(OpExpr, l);
524 :
525 1432 : lclauses = lappend(lclauses, ExecInitExpr(linitial(hclause->args),
526 : (PlanState *) hjstate));
527 1432 : rclauses = lappend(rclauses, ExecInitExpr(lsecond(hclause->args),
528 : (PlanState *) hjstate));
529 1432 : hoperators = lappend_oid(hoperators, hclause->opno);
530 : }
531 1361 : hjstate->hj_OuterHashKeys = lclauses;
532 1361 : hjstate->hj_InnerHashKeys = rclauses;
533 1361 : hjstate->hj_HashOperators = hoperators;
534 : /* child Hash node needs to evaluate inner hash keys, too */
535 1361 : ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
536 :
537 1361 : hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
538 1361 : hjstate->hj_MatchedOuter = false;
539 1361 : hjstate->hj_OuterNotEmpty = false;
540 :
541 1361 : return hjstate;
542 : }
543 :
544 : /* ----------------------------------------------------------------
545 : * ExecEndHashJoin
546 : *
547 : * clean up routine for HashJoin node
548 : * ----------------------------------------------------------------
549 : */
550 : void
551 1354 : ExecEndHashJoin(HashJoinState *node)
552 : {
553 : /*
554 : * Free hash table
555 : */
556 1354 : if (node->hj_HashTable)
557 : {
558 884 : ExecHashTableDestroy(node->hj_HashTable);
559 884 : node->hj_HashTable = NULL;
560 : }
561 :
562 : /*
563 : * Free the exprcontext
564 : */
565 1354 : ExecFreeExprContext(&node->js.ps);
566 :
567 : /*
568 : * clean out the tuple table
569 : */
570 1354 : ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
571 1354 : ExecClearTuple(node->hj_OuterTupleSlot);
572 1354 : ExecClearTuple(node->hj_HashTupleSlot);
573 :
574 : /*
575 : * clean up subtrees
576 : */
577 1354 : ExecEndNode(outerPlanState(node));
578 1354 : ExecEndNode(innerPlanState(node));
579 1354 : }
580 :
581 : /*
582 : * ExecHashJoinOuterGetTuple
583 : *
584 : * get the next outer tuple for hashjoin: either by
585 : * executing the outer plan node in the first pass, or from
586 : * the temp files for the hashjoin batches.
587 : *
588 : * Returns a null slot if no more outer tuples (within the current batch).
589 : *
590 : * On success, the tuple's hash value is stored at *hashvalue --- this is
591 : * either originally computed, or re-read from the temp file.
592 : */
593 : static TupleTableSlot *
594 408890 : ExecHashJoinOuterGetTuple(PlanState *outerNode,
595 : HashJoinState *hjstate,
596 : uint32 *hashvalue)
597 : {
598 408890 : HashJoinTable hashtable = hjstate->hj_HashTable;
599 408890 : int curbatch = hashtable->curbatch;
600 : TupleTableSlot *slot;
601 :
602 408890 : if (curbatch == 0) /* if it is the first pass */
603 : {
604 : /*
605 : * Check to see if first outer tuple was already fetched by
606 : * ExecHashJoin() and not used yet.
607 : */
608 400483 : slot = hjstate->hj_FirstOuterTupleSlot;
609 400483 : if (!TupIsNull(slot))
610 608 : hjstate->hj_FirstOuterTupleSlot = NULL;
611 : else
612 399875 : slot = ExecProcNode(outerNode);
613 :
614 801081 : while (!TupIsNull(slot))
615 : {
616 : /*
617 : * We have to compute the tuple's hash value.
618 : */
619 399744 : ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
620 :
621 399744 : econtext->ecxt_outertuple = slot;
622 399744 : if (ExecHashGetHashValue(hashtable, econtext,
623 : hjstate->hj_OuterHashKeys,
624 : true, /* outer tuple */
625 399744 : HJ_FILL_OUTER(hjstate),
626 : hashvalue))
627 : {
628 : /* remember outer relation is not empty for possible rescan */
629 399629 : hjstate->hj_OuterNotEmpty = true;
630 :
631 399629 : return slot;
632 : }
633 :
634 : /*
635 : * That tuple couldn't match because of a NULL, so discard it and
636 : * continue with the next one.
637 : */
638 115 : slot = ExecProcNode(outerNode);
639 : }
640 : }
641 8407 : else if (curbatch < hashtable->nbatch)
642 : {
643 8407 : BufFile *file = hashtable->outerBatchFile[curbatch];
644 :
645 : /*
646 : * In outer-join cases, we could get here even though the batch file
647 : * is empty.
648 : */
649 8407 : if (file == NULL)
650 0 : return NULL;
651 :
652 8407 : slot = ExecHashJoinGetSavedTuple(hjstate,
653 : file,
654 : hashvalue,
655 : hjstate->hj_OuterTupleSlot);
656 8407 : if (!TupIsNull(slot))
657 8400 : return slot;
658 : }
659 :
660 : /* End of this batch */
661 861 : return NULL;
662 : }
663 :
664 : /*
665 : * ExecHashJoinNewBatch
666 : * switch to a new hashjoin batch
667 : *
668 : * Returns true if successful, false if there are no more batches.
669 : */
670 : static bool
671 860 : ExecHashJoinNewBatch(HashJoinState *hjstate)
672 : {
673 860 : HashJoinTable hashtable = hjstate->hj_HashTable;
674 : int nbatch;
675 : int curbatch;
676 : BufFile *innerFile;
677 : TupleTableSlot *slot;
678 : uint32 hashvalue;
679 :
680 860 : nbatch = hashtable->nbatch;
681 860 : curbatch = hashtable->curbatch;
682 :
683 860 : if (curbatch > 0)
684 : {
685 : /*
686 : * We no longer need the previous outer batch file; close it right
687 : * away to free disk space.
688 : */
689 7 : if (hashtable->outerBatchFile[curbatch])
690 7 : BufFileClose(hashtable->outerBatchFile[curbatch]);
691 7 : hashtable->outerBatchFile[curbatch] = NULL;
692 : }
693 : else /* we just finished the first batch */
694 : {
695 : /*
696 : * Reset some of the skew optimization state variables, since we no
697 : * longer need to consider skew tuples after the first batch. The
698 : * memory context reset we are about to do will release the skew
699 : * hashtable itself.
700 : */
701 853 : hashtable->skewEnabled = false;
702 853 : hashtable->skewBucket = NULL;
703 853 : hashtable->skewBucketNums = NULL;
704 853 : hashtable->nSkewBuckets = 0;
705 853 : hashtable->spaceUsedSkew = 0;
706 : }
707 :
708 : /*
709 : * We can always skip over any batches that are completely empty on both
710 : * sides. We can sometimes skip over batches that are empty on only one
711 : * side, but there are exceptions:
712 : *
713 : * 1. In a left/full outer join, we have to process outer batches even if
714 : * the inner batch is empty. Similarly, in a right/full outer join, we
715 : * have to process inner batches even if the outer batch is empty.
716 : *
717 : * 2. If we have increased nbatch since the initial estimate, we have to
718 : * scan inner batches since they might contain tuples that need to be
719 : * reassigned to later inner batches.
720 : *
721 : * 3. Similarly, if we have increased nbatch since starting the outer
722 : * scan, we have to rescan outer batches in case they contain tuples that
723 : * need to be reassigned.
724 : */
725 860 : curbatch++;
726 1727 : while (curbatch < nbatch &&
727 14 : (hashtable->outerBatchFile[curbatch] == NULL ||
728 7 : hashtable->innerBatchFile[curbatch] == NULL))
729 : {
730 0 : if (hashtable->outerBatchFile[curbatch] &&
731 0 : HJ_FILL_OUTER(hjstate))
732 0 : break; /* must process due to rule 1 */
733 0 : if (hashtable->innerBatchFile[curbatch] &&
734 0 : HJ_FILL_INNER(hjstate))
735 0 : break; /* must process due to rule 1 */
736 0 : if (hashtable->innerBatchFile[curbatch] &&
737 0 : nbatch != hashtable->nbatch_original)
738 0 : break; /* must process due to rule 2 */
739 0 : if (hashtable->outerBatchFile[curbatch] &&
740 0 : nbatch != hashtable->nbatch_outstart)
741 0 : break; /* must process due to rule 3 */
742 : /* We can ignore this batch. */
743 : /* Release associated temp files right away. */
744 0 : if (hashtable->innerBatchFile[curbatch])
745 0 : BufFileClose(hashtable->innerBatchFile[curbatch]);
746 0 : hashtable->innerBatchFile[curbatch] = NULL;
747 0 : if (hashtable->outerBatchFile[curbatch])
748 0 : BufFileClose(hashtable->outerBatchFile[curbatch]);
749 0 : hashtable->outerBatchFile[curbatch] = NULL;
750 0 : curbatch++;
751 : }
752 :
753 860 : if (curbatch >= nbatch)
754 853 : return false; /* no more batches */
755 :
756 7 : hashtable->curbatch = curbatch;
757 :
758 : /*
759 : * Reload the hash table with the new inner batch (which could be empty)
760 : */
761 7 : ExecHashTableReset(hashtable);
762 :
763 7 : innerFile = hashtable->innerBatchFile[curbatch];
764 :
765 7 : if (innerFile != NULL)
766 : {
767 7 : if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
768 0 : ereport(ERROR,
769 : (errcode_for_file_access(),
770 : errmsg("could not rewind hash-join temporary file: %m")));
771 :
772 12345 : while ((slot = ExecHashJoinGetSavedTuple(hjstate,
773 : innerFile,
774 : &hashvalue,
775 : hjstate->hj_HashTupleSlot)))
776 : {
777 : /*
778 : * NOTE: some tuples may be sent to future batches. Also, it is
779 : * possible for hashtable->nbatch to be increased here!
780 : */
781 12331 : ExecHashTableInsert(hashtable, slot, hashvalue);
782 : }
783 :
784 : /*
785 : * after we build the hash table, the inner batch file is no longer
786 : * needed
787 : */
788 7 : BufFileClose(innerFile);
789 7 : hashtable->innerBatchFile[curbatch] = NULL;
790 : }
791 :
792 : /*
793 : * Rewind outer batch file (if present), so that we can start reading it.
794 : */
795 7 : if (hashtable->outerBatchFile[curbatch] != NULL)
796 : {
797 7 : if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
798 0 : ereport(ERROR,
799 : (errcode_for_file_access(),
800 : errmsg("could not rewind hash-join temporary file: %m")));
801 : }
802 :
803 7 : return true;
804 : }
805 :
806 : /*
807 : * ExecHashJoinSaveTuple
808 : * save a tuple to a batch file.
809 : *
810 : * The data recorded in the file for each tuple is its hash value,
811 : * then the tuple in MinimalTuple format.
812 : *
813 : * Note: it is important always to call this in the regular executor
814 : * context, not in a shorter-lived context; else the temp file buffers
815 : * will get messed up.
816 : */
817 : void
818 20731 : ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
819 : BufFile **fileptr)
820 : {
821 20731 : BufFile *file = *fileptr;
822 : size_t written;
823 :
824 20731 : if (file == NULL)
825 : {
826 : /* First write to this batch file, so open it. */
827 14 : file = BufFileCreateTemp(false);
828 14 : *fileptr = file;
829 : }
830 :
831 20731 : written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
832 20731 : if (written != sizeof(uint32))
833 0 : ereport(ERROR,
834 : (errcode_for_file_access(),
835 : errmsg("could not write to hash-join temporary file: %m")));
836 :
837 20731 : written = BufFileWrite(file, (void *) tuple, tuple->t_len);
838 20731 : if (written != tuple->t_len)
839 0 : ereport(ERROR,
840 : (errcode_for_file_access(),
841 : errmsg("could not write to hash-join temporary file: %m")));
842 20731 : }
843 :
844 : /*
845 : * ExecHashJoinGetSavedTuple
846 : * read the next tuple from a batch file. Return NULL if no more.
847 : *
848 : * On success, *hashvalue is set to the tuple's hash value, and the tuple
849 : * itself is stored in the given slot.
850 : */
851 : static TupleTableSlot *
852 20745 : ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
853 : BufFile *file,
854 : uint32 *hashvalue,
855 : TupleTableSlot *tupleSlot)
856 : {
857 : uint32 header[2];
858 : size_t nread;
859 : MinimalTuple tuple;
860 :
861 : /*
862 : * We check for interrupts here because this is typically taken as an
863 : * alternative code path to an ExecProcNode() call, which would include
864 : * such a check.
865 : */
866 20745 : CHECK_FOR_INTERRUPTS();
867 :
868 : /*
869 : * Since both the hash value and the MinimalTuple length word are uint32,
870 : * we can read them both in one BufFileRead() call without any type
871 : * cheating.
872 : */
873 20745 : nread = BufFileRead(file, (void *) header, sizeof(header));
874 20745 : if (nread == 0) /* end of file */
875 : {
876 14 : ExecClearTuple(tupleSlot);
877 14 : return NULL;
878 : }
879 20731 : if (nread != sizeof(header))
880 0 : ereport(ERROR,
881 : (errcode_for_file_access(),
882 : errmsg("could not read from hash-join temporary file: %m")));
883 20731 : *hashvalue = header[0];
884 20731 : tuple = (MinimalTuple) palloc(header[1]);
885 20731 : tuple->t_len = header[1];
886 20731 : nread = BufFileRead(file,
887 : (void *) ((char *) tuple + sizeof(uint32)),
888 20731 : header[1] - sizeof(uint32));
889 20731 : if (nread != header[1] - sizeof(uint32))
890 0 : ereport(ERROR,
891 : (errcode_for_file_access(),
892 : errmsg("could not read from hash-join temporary file: %m")));
893 20731 : return ExecStoreMinimalTuple(tuple, tupleSlot, true);
894 : }
895 :
896 :
897 : void
898 153 : ExecReScanHashJoin(HashJoinState *node)
899 : {
900 : /*
901 : * In a multi-batch join, we currently have to do rescans the hard way,
902 : * primarily because batch temp files may have already been released. But
903 : * if it's a single-batch join, and there is no parameter change for the
904 : * inner subnode, then we can just re-use the existing hash table without
905 : * rebuilding it.
906 : */
907 153 : if (node->hj_HashTable != NULL)
908 : {
909 268 : if (node->hj_HashTable->nbatch == 1 &&
910 134 : node->js.ps.righttree->chgParam == NULL)
911 : {
912 : /*
913 : * Okay to reuse the hash table; needn't rescan inner, either.
914 : *
915 : * However, if it's a right/full join, we'd better reset the
916 : * inner-tuple match flags contained in the table.
917 : */
918 18 : if (HJ_FILL_INNER(node))
919 4 : ExecHashTableResetMatchFlags(node->hj_HashTable);
920 :
921 : /*
922 : * Also, we need to reset our state about the emptiness of the
923 : * outer relation, so that the new scan of the outer will update
924 : * it correctly if it turns out to be empty this time. (There's no
925 : * harm in clearing it now because ExecHashJoin won't need the
926 : * info. In the other cases, where the hash table doesn't exist
927 : * or we are destroying it, we leave this state alone because
928 : * ExecHashJoin will need it the first time through.)
929 : */
930 18 : node->hj_OuterNotEmpty = false;
931 :
932 : /* ExecHashJoin can skip the BUILD_HASHTABLE step */
933 18 : node->hj_JoinState = HJ_NEED_NEW_OUTER;
934 : }
935 : else
936 : {
937 : /* must destroy and rebuild hash table */
938 116 : ExecHashTableDestroy(node->hj_HashTable);
939 116 : node->hj_HashTable = NULL;
940 116 : node->hj_JoinState = HJ_BUILD_HASHTABLE;
941 :
942 : /*
943 : * if chgParam of subnode is not null then plan will be re-scanned
944 : * by first ExecProcNode.
945 : */
946 116 : if (node->js.ps.righttree->chgParam == NULL)
947 0 : ExecReScan(node->js.ps.righttree);
948 : }
949 : }
950 :
951 : /* Always reset intra-tuple state */
952 153 : node->hj_CurHashValue = 0;
953 153 : node->hj_CurBucketNo = 0;
954 153 : node->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
955 153 : node->hj_CurTuple = NULL;
956 :
957 153 : node->hj_MatchedOuter = false;
958 153 : node->hj_FirstOuterTupleSlot = NULL;
959 :
960 : /*
961 : * if chgParam of subnode is not null then plan will be re-scanned by
962 : * first ExecProcNode.
963 : */
964 153 : if (node->js.ps.lefttree->chgParam == NULL)
965 133 : ExecReScan(node->js.ps.lefttree);
966 153 : }
|