Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeBitmapHeapscan.c
4 : * Routines to support bitmapped scans of relations
5 : *
6 : * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 : * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 : * special snapshots). The reason is that since index and heap scans are
9 : * decoupled, there can be no assurance that the index tuple prompting a
10 : * visit to a particular heap TID still exists when the visit is made.
11 : * Therefore the tuple might not exist anymore either (which is OK because
12 : * heap_fetch will cope) --- but worse, the tuple slot could have been
13 : * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 : * certain to fail the time qual and so it will not be mistakenly returned,
15 : * but with anything else we might return a tuple that doesn't meet the
16 : * required index qual conditions.
17 : *
18 : *
19 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
20 : * Portions Copyright (c) 1994, Regents of the University of California
21 : *
22 : *
23 : * IDENTIFICATION
24 : * src/backend/executor/nodeBitmapHeapscan.c
25 : *
26 : *-------------------------------------------------------------------------
27 : */
28 : /*
29 : * INTERFACE ROUTINES
30 : * ExecBitmapHeapScan scans a relation using bitmap info
31 : * ExecBitmapHeapNext workhorse for above
32 : * ExecInitBitmapHeapScan creates and initializes state info.
33 : * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 : * ExecEndBitmapHeapScan releases all storage.
35 : */
36 : #include "postgres.h"
37 :
38 : #include <math.h>
39 :
40 : #include "access/relscan.h"
41 : #include "access/transam.h"
42 : #include "executor/execdebug.h"
43 : #include "executor/nodeBitmapHeapscan.h"
44 : #include "miscadmin.h"
45 : #include "pgstat.h"
46 : #include "storage/bufmgr.h"
47 : #include "storage/predicate.h"
48 : #include "utils/memutils.h"
49 : #include "utils/rel.h"
50 : #include "utils/spccache.h"
51 : #include "utils/snapmgr.h"
52 : #include "utils/tqual.h"
53 :
54 :
55 : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
56 : static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
57 : static inline void BitmapDoneInitializingSharedState(
58 : ParallelBitmapHeapState *pstate);
59 : static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
60 : TBMIterateResult *tbmres);
61 : static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
62 : static inline void BitmapPrefetch(BitmapHeapScanState *node,
63 : HeapScanDesc scan);
64 : static bool BitmapShouldInitializeSharedState(
65 : ParallelBitmapHeapState *pstate);
66 :
67 :
68 : /* ----------------------------------------------------------------
69 : * BitmapHeapNext
70 : *
71 : * Retrieve next tuple from the BitmapHeapScan node's currentRelation
72 : * ----------------------------------------------------------------
73 : */
74 : static TupleTableSlot *
75 385574 : BitmapHeapNext(BitmapHeapScanState *node)
76 : {
77 : ExprContext *econtext;
78 : HeapScanDesc scan;
79 : TIDBitmap *tbm;
80 385574 : TBMIterator *tbmiterator = NULL;
81 385574 : TBMSharedIterator *shared_tbmiterator = NULL;
82 : TBMIterateResult *tbmres;
83 : OffsetNumber targoffset;
84 : TupleTableSlot *slot;
85 385574 : ParallelBitmapHeapState *pstate = node->pstate;
86 385574 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
87 :
88 : /*
89 : * extract necessary information from index scan node
90 : */
91 385574 : econtext = node->ss.ps.ps_ExprContext;
92 385574 : slot = node->ss.ss_ScanTupleSlot;
93 385574 : scan = node->ss.ss_currentScanDesc;
94 385574 : tbm = node->tbm;
95 385574 : if (pstate == NULL)
96 187520 : tbmiterator = node->tbmiterator;
97 : else
98 198054 : shared_tbmiterator = node->shared_tbmiterator;
99 385574 : tbmres = node->tbmres;
100 :
101 : /*
102 : * If we haven't yet performed the underlying index scan, do it, and begin
103 : * the iteration over the bitmap.
104 : *
105 : * For prefetching, we use *two* iterators, one for the pages we are
106 : * actually scanning and another that runs ahead of the first for
107 : * prefetching. node->prefetch_pages tracks exactly how many pages ahead
108 : * the prefetch iterator is. Also, node->prefetch_target tracks the
109 : * desired prefetch distance, which starts small and increases up to the
110 : * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
111 : * a scan that stops after a few tuples because of a LIMIT.
112 : */
113 385574 : if (!node->initialized)
114 : {
115 1145 : if (!pstate)
116 : {
117 1090 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
118 :
119 1090 : if (!tbm || !IsA(tbm, TIDBitmap))
120 0 : elog(ERROR, "unrecognized result from subplan");
121 :
122 1090 : node->tbm = tbm;
123 1090 : node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
124 1090 : node->tbmres = tbmres = NULL;
125 :
126 : #ifdef USE_PREFETCH
127 1090 : if (node->prefetch_maximum > 0)
128 : {
129 1090 : node->prefetch_iterator = tbm_begin_iterate(tbm);
130 1090 : node->prefetch_pages = 0;
131 1090 : node->prefetch_target = -1;
132 : }
133 : #endif /* USE_PREFETCH */
134 : }
135 : else
136 : {
137 : /*
138 : * The leader will immediately come out of the function, but
139 : * others will be blocked until leader populates the TBM and wakes
140 : * them up.
141 : */
142 55 : if (BitmapShouldInitializeSharedState(pstate))
143 : {
144 11 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
145 11 : if (!tbm || !IsA(tbm, TIDBitmap))
146 0 : elog(ERROR, "unrecognized result from subplan");
147 :
148 11 : node->tbm = tbm;
149 :
150 : /*
151 : * Prepare to iterate over the TBM. This will return the
152 : * dsa_pointer of the iterator state which will be used by
153 : * multiple processes to iterate jointly.
154 : */
155 11 : pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
156 : #ifdef USE_PREFETCH
157 11 : if (node->prefetch_maximum > 0)
158 : {
159 11 : pstate->prefetch_iterator =
160 11 : tbm_prepare_shared_iterate(tbm);
161 :
162 : /*
163 : * We don't need the mutex here as we haven't yet woke up
164 : * others.
165 : */
166 11 : pstate->prefetch_pages = 0;
167 11 : pstate->prefetch_target = -1;
168 : }
169 : #endif
170 :
171 : /* We have initialized the shared state so wake up others. */
172 11 : BitmapDoneInitializingSharedState(pstate);
173 : }
174 :
175 : /* Allocate a private iterator and attach the shared state to it */
176 55 : node->shared_tbmiterator = shared_tbmiterator =
177 55 : tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
178 55 : node->tbmres = tbmres = NULL;
179 :
180 : #ifdef USE_PREFETCH
181 55 : if (node->prefetch_maximum > 0)
182 : {
183 55 : node->shared_prefetch_iterator =
184 55 : tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
185 : }
186 : #endif /* USE_PREFETCH */
187 : }
188 1145 : node->initialized = true;
189 : }
190 :
191 : for (;;)
192 : {
193 : Page dp;
194 : ItemId lp;
195 :
196 484442 : CHECK_FOR_INTERRUPTS();
197 :
198 : /*
199 : * Get next page of results if needed
200 : */
201 484442 : if (tbmres == NULL)
202 : {
203 29378 : if (!pstate)
204 24638 : node->tbmres = tbmres = tbm_iterate(tbmiterator);
205 : else
206 4740 : node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
207 29378 : if (tbmres == NULL)
208 : {
209 : /* no more entries in the bitmap */
210 1115 : break;
211 : }
212 :
213 28263 : BitmapAdjustPrefetchIterator(node, tbmres);
214 :
215 : /*
216 : * Ignore any claimed entries past what we think is the end of the
217 : * relation. (This is probably not necessary given that we got at
218 : * least AccessShareLock on the table before performing any of the
219 : * indexscans, but let's be safe.)
220 : */
221 28263 : if (tbmres->blockno >= scan->rs_nblocks)
222 : {
223 0 : node->tbmres = tbmres = NULL;
224 0 : continue;
225 : }
226 :
227 : /*
228 : * Fetch the current heap page and identify candidate tuples.
229 : */
230 28263 : bitgetpage(scan, tbmres);
231 :
232 28263 : if (tbmres->ntuples >= 0)
233 8041 : node->exact_pages++;
234 : else
235 20222 : node->lossy_pages++;
236 :
237 : /*
238 : * Set rs_cindex to first slot to examine
239 : */
240 28263 : scan->rs_cindex = 0;
241 :
242 : /* Adjust the prefetch target */
243 28263 : BitmapAdjustPrefetchTarget(node);
244 : }
245 : else
246 : {
247 : /*
248 : * Continuing in previously obtained page; advance rs_cindex
249 : */
250 455064 : scan->rs_cindex++;
251 :
252 : #ifdef USE_PREFETCH
253 :
254 : /*
255 : * Try to prefetch at least a few pages even before we get to the
256 : * second page if we don't stop reading after the first tuple.
257 : */
258 455064 : if (!pstate)
259 : {
260 257064 : if (node->prefetch_target < node->prefetch_maximum)
261 658 : node->prefetch_target++;
262 : }
263 198000 : else if (pstate->prefetch_target < node->prefetch_maximum)
264 : {
265 : /* take spinlock while updating shared state */
266 994 : SpinLockAcquire(&pstate->mutex);
267 994 : if (pstate->prefetch_target < node->prefetch_maximum)
268 994 : pstate->prefetch_target++;
269 994 : SpinLockRelease(&pstate->mutex);
270 : }
271 : #endif /* USE_PREFETCH */
272 : }
273 :
274 : /*
275 : * Out of range? If so, nothing more to look at on this page
276 : */
277 483327 : if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
278 : {
279 28233 : node->tbmres = tbmres = NULL;
280 28233 : continue;
281 : }
282 :
283 : /*
284 : * We issue prefetch requests *after* fetching the current page to try
285 : * to avoid having prefetching interfere with the main I/O. Also, this
286 : * should happen only when we have determined there is still something
287 : * to do on the current page, else we may uselessly prefetch the same
288 : * page we are just about to request for real.
289 : */
290 455094 : BitmapPrefetch(node, scan);
291 :
292 : /*
293 : * Okay to fetch the tuple
294 : */
295 455094 : targoffset = scan->rs_vistuples[scan->rs_cindex];
296 455094 : dp = (Page) BufferGetPage(scan->rs_cbuf);
297 455094 : lp = PageGetItemId(dp, targoffset);
298 455094 : Assert(ItemIdIsNormal(lp));
299 :
300 455094 : scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
301 455094 : scan->rs_ctup.t_len = ItemIdGetLength(lp);
302 455094 : scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id;
303 455094 : ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
304 :
305 455094 : pgstat_count_heap_fetch(scan->rs_rd);
306 :
307 : /*
308 : * Set up the result slot to point to this tuple. Note that the slot
309 : * acquires a pin on the buffer.
310 : */
311 455094 : ExecStoreTuple(&scan->rs_ctup,
312 : slot,
313 : scan->rs_cbuf,
314 : false);
315 :
316 : /*
317 : * If we are using lossy info, we have to recheck the qual conditions
318 : * at every tuple.
319 : */
320 455094 : if (tbmres->recheck)
321 : {
322 142154 : econtext->ecxt_scantuple = slot;
323 142154 : ResetExprContext(econtext);
324 :
325 142154 : if (!ExecQual(node->bitmapqualorig, econtext))
326 : {
327 : /* Fails recheck, so drop it and loop back for another */
328 70635 : InstrCountFiltered2(node, 1);
329 70635 : ExecClearTuple(slot);
330 70635 : continue;
331 : }
332 : }
333 :
334 : /* OK to return this tuple */
335 384459 : return slot;
336 98868 : }
337 :
338 : /*
339 : * if we get here it means we are at the end of the scan..
340 : */
341 1115 : return ExecClearTuple(slot);
342 : }
343 :
344 : /*
345 : * bitgetpage - subroutine for BitmapHeapNext()
346 : *
347 : * This routine reads and pins the specified page of the relation, then
348 : * builds an array indicating which tuples on the page are both potentially
349 : * interesting according to the bitmap, and visible according to the snapshot.
350 : */
351 : static void
352 28263 : bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
353 : {
354 28263 : BlockNumber page = tbmres->blockno;
355 : Buffer buffer;
356 : Snapshot snapshot;
357 : int ntup;
358 :
359 : /*
360 : * Acquire pin on the target heap page, trading in any pin we held before.
361 : */
362 28263 : Assert(page < scan->rs_nblocks);
363 :
364 28263 : scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
365 : scan->rs_rd,
366 : page);
367 28263 : buffer = scan->rs_cbuf;
368 28263 : snapshot = scan->rs_snapshot;
369 :
370 28263 : ntup = 0;
371 :
372 : /*
373 : * Prune and repair fragmentation for the whole page, if possible.
374 : */
375 28263 : heap_page_prune_opt(scan->rs_rd, buffer);
376 :
377 : /*
378 : * We must hold share lock on the buffer content while examining tuple
379 : * visibility. Afterwards, however, the tuples we have found to be
380 : * visible are guaranteed good as long as we hold the buffer pin.
381 : */
382 28263 : LockBuffer(buffer, BUFFER_LOCK_SHARE);
383 :
384 : /*
385 : * We need two separate strategies for lossy and non-lossy cases.
386 : */
387 28263 : if (tbmres->ntuples >= 0)
388 : {
389 : /*
390 : * Bitmap is non-lossy, so we just look through the offsets listed in
391 : * tbmres; but we have to follow any HOT chain starting at each such
392 : * offset.
393 : */
394 : int curslot;
395 :
396 345671 : for (curslot = 0; curslot < tbmres->ntuples; curslot++)
397 : {
398 337630 : OffsetNumber offnum = tbmres->offsets[curslot];
399 : ItemPointerData tid;
400 : HeapTupleData heapTuple;
401 :
402 337630 : ItemPointerSet(&tid, page, offnum);
403 337630 : if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
404 : &heapTuple, NULL, true))
405 317225 : scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
406 : }
407 : }
408 : else
409 : {
410 : /*
411 : * Bitmap is lossy, so we must examine each item pointer on the page.
412 : * But we can ignore HOT chains, since we'll check each tuple anyway.
413 : */
414 20222 : Page dp = (Page) BufferGetPage(buffer);
415 20222 : OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
416 : OffsetNumber offnum;
417 :
418 158116 : for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
419 : {
420 : ItemId lp;
421 : HeapTupleData loctup;
422 : bool valid;
423 :
424 137894 : lp = PageGetItemId(dp, offnum);
425 137894 : if (!ItemIdIsNormal(lp))
426 0 : continue;
427 137894 : loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
428 137894 : loctup.t_len = ItemIdGetLength(lp);
429 137894 : loctup.t_tableOid = scan->rs_rd->rd_id;
430 137894 : ItemPointerSet(&loctup.t_self, page, offnum);
431 137894 : valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
432 137894 : if (valid)
433 : {
434 137894 : scan->rs_vistuples[ntup++] = offnum;
435 137894 : PredicateLockTuple(scan->rs_rd, &loctup, snapshot);
436 : }
437 137894 : CheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
438 : buffer, snapshot);
439 : }
440 : }
441 :
442 28263 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
443 :
444 28263 : Assert(ntup <= MaxHeapTuplesPerPage);
445 28263 : scan->rs_ntuples = ntup;
446 28263 : }
447 :
448 : /*
449 : * BitmapDoneInitializingSharedState - Shared state is initialized
450 : *
451 : * By this time the leader has already populated the TBM and initialized the
452 : * shared state so wake up other processes.
453 : */
454 : static inline void
455 11 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
456 : {
457 11 : SpinLockAcquire(&pstate->mutex);
458 11 : pstate->state = BM_FINISHED;
459 11 : SpinLockRelease(&pstate->mutex);
460 11 : ConditionVariableBroadcast(&pstate->cv);
461 11 : }
462 :
463 : /*
464 : * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
465 : */
466 : static inline void
467 28263 : BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
468 : TBMIterateResult *tbmres)
469 : {
470 : #ifdef USE_PREFETCH
471 28263 : ParallelBitmapHeapState *pstate = node->pstate;
472 :
473 28263 : if (pstate == NULL)
474 : {
475 23578 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
476 :
477 23578 : if (node->prefetch_pages > 0)
478 : {
479 : /* The main iterator has closed the distance by one page */
480 22507 : node->prefetch_pages--;
481 : }
482 1071 : else if (prefetch_iterator)
483 : {
484 : /* Do not let the prefetch iterator get behind the main one */
485 1071 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
486 :
487 1071 : if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
488 0 : elog(ERROR, "prefetch and main iterators are out of sync");
489 : }
490 51841 : return;
491 : }
492 :
493 4685 : if (node->prefetch_maximum > 0)
494 : {
495 4685 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
496 :
497 4685 : SpinLockAcquire(&pstate->mutex);
498 4685 : if (pstate->prefetch_pages > 0)
499 : {
500 4674 : pstate->prefetch_pages--;
501 4674 : SpinLockRelease(&pstate->mutex);
502 : }
503 : else
504 : {
505 : /* Release the mutex before iterating */
506 11 : SpinLockRelease(&pstate->mutex);
507 :
508 : /*
509 : * In case of shared mode, we can not ensure that the current
510 : * blockno of the main iterator and that of the prefetch iterator
511 : * are same. It's possible that whatever blockno we are
512 : * prefetching will be processed by another process. Therefore,
513 : * we don't validate the blockno here as we do in non-parallel
514 : * case.
515 : */
516 11 : if (prefetch_iterator)
517 11 : tbm_shared_iterate(prefetch_iterator);
518 : }
519 : }
520 : #endif /* USE_PREFETCH */
521 : }
522 :
523 : /*
524 : * BitmapAdjustPrefetchTarget - Adjust the prefetch target
525 : *
526 : * Increase prefetch target if it's not yet at the max. Note that
527 : * we will increase it to zero after fetching the very first
528 : * page/tuple, then to one after the second tuple is fetched, then
529 : * it doubles as later pages are fetched.
530 : */
531 : static inline void
532 28263 : BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
533 : {
534 : #ifdef USE_PREFETCH
535 28263 : ParallelBitmapHeapState *pstate = node->pstate;
536 :
537 28263 : if (pstate == NULL)
538 : {
539 23578 : if (node->prefetch_target >= node->prefetch_maximum)
540 : /* don't increase any further */ ;
541 710 : else if (node->prefetch_target >= node->prefetch_maximum / 2)
542 9 : node->prefetch_target = node->prefetch_maximum;
543 701 : else if (node->prefetch_target > 0)
544 0 : node->prefetch_target *= 2;
545 : else
546 701 : node->prefetch_target++;
547 51841 : return;
548 : }
549 :
550 : /* Do an unlocked check first to save spinlock acquisitions. */
551 4685 : if (pstate->prefetch_target < node->prefetch_maximum)
552 : {
553 42 : SpinLockAcquire(&pstate->mutex);
554 42 : if (pstate->prefetch_target >= node->prefetch_maximum)
555 : /* don't increase any further */ ;
556 42 : else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
557 10 : pstate->prefetch_target = node->prefetch_maximum;
558 32 : else if (pstate->prefetch_target > 0)
559 21 : pstate->prefetch_target *= 2;
560 : else
561 11 : pstate->prefetch_target++;
562 42 : SpinLockRelease(&pstate->mutex);
563 : }
564 : #endif /* USE_PREFETCH */
565 : }
566 :
567 : /*
568 : * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
569 : */
570 : static inline void
571 455094 : BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
572 : {
573 : #ifdef USE_PREFETCH
574 455094 : ParallelBitmapHeapState *pstate = node->pstate;
575 :
576 455094 : if (pstate == NULL)
577 : {
578 257094 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
579 :
580 257094 : if (prefetch_iterator)
581 : {
582 528051 : while (node->prefetch_pages < node->prefetch_target)
583 : {
584 23007 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
585 :
586 23007 : if (tbmpre == NULL)
587 : {
588 : /* No more pages to prefetch */
589 500 : tbm_end_iterate(prefetch_iterator);
590 500 : node->prefetch_iterator = NULL;
591 500 : break;
592 : }
593 22507 : node->prefetch_pages++;
594 22507 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
595 : }
596 : }
597 :
598 257094 : return;
599 : }
600 :
601 198000 : if (pstate->prefetch_pages < pstate->prefetch_target)
602 : {
603 84918 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
604 :
605 84918 : if (prefetch_iterator)
606 : {
607 : while (1)
608 : {
609 : TBMIterateResult *tbmpre;
610 7834 : bool do_prefetch = false;
611 :
612 : /*
613 : * Recheck under the mutex. If some other process has already
614 : * done enough prefetching then we need not to do anything.
615 : */
616 7834 : SpinLockAcquire(&pstate->mutex);
617 7834 : if (pstate->prefetch_pages < pstate->prefetch_target)
618 : {
619 4686 : pstate->prefetch_pages++;
620 4686 : do_prefetch = true;
621 : }
622 7834 : SpinLockRelease(&pstate->mutex);
623 :
624 7834 : if (!do_prefetch)
625 3148 : return;
626 :
627 4686 : tbmpre = tbm_shared_iterate(prefetch_iterator);
628 4686 : if (tbmpre == NULL)
629 : {
630 : /* No more pages to prefetch */
631 12 : tbm_end_shared_iterate(prefetch_iterator);
632 12 : node->shared_prefetch_iterator = NULL;
633 12 : break;
634 : }
635 :
636 4674 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
637 4674 : }
638 : }
639 : }
640 : #endif /* USE_PREFETCH */
641 : }
642 :
643 : /*
644 : * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
645 : */
646 : static bool
647 0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
648 : {
649 : ExprContext *econtext;
650 :
651 : /*
652 : * extract necessary information from index scan node
653 : */
654 0 : econtext = node->ss.ps.ps_ExprContext;
655 :
656 : /* Does the tuple meet the original qual conditions? */
657 0 : econtext->ecxt_scantuple = slot;
658 :
659 0 : ResetExprContext(econtext);
660 :
661 0 : return ExecQual(node->bitmapqualorig, econtext);
662 : }
663 :
664 : /* ----------------------------------------------------------------
665 : * ExecBitmapHeapScan(node)
666 : * ----------------------------------------------------------------
667 : */
668 : static TupleTableSlot *
669 385563 : ExecBitmapHeapScan(PlanState *pstate)
670 : {
671 385563 : BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
672 :
673 385563 : return ExecScan(&node->ss,
674 : (ExecScanAccessMtd) BitmapHeapNext,
675 : (ExecScanRecheckMtd) BitmapHeapRecheck);
676 : }
677 :
678 : /* ----------------------------------------------------------------
679 : * ExecReScanBitmapHeapScan(node)
680 : * ----------------------------------------------------------------
681 : */
682 : void
683 133 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
684 : {
685 133 : PlanState *outerPlan = outerPlanState(node);
686 :
687 : /* rescan to release any page pin */
688 133 : heap_rescan(node->ss.ss_currentScanDesc, NULL);
689 :
690 133 : if (node->tbmiterator)
691 81 : tbm_end_iterate(node->tbmiterator);
692 133 : if (node->prefetch_iterator)
693 80 : tbm_end_iterate(node->prefetch_iterator);
694 133 : if (node->shared_tbmiterator)
695 9 : tbm_end_shared_iterate(node->shared_tbmiterator);
696 133 : if (node->shared_prefetch_iterator)
697 0 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
698 133 : if (node->tbm)
699 90 : tbm_free(node->tbm);
700 133 : node->tbm = NULL;
701 133 : node->tbmiterator = NULL;
702 133 : node->tbmres = NULL;
703 133 : node->prefetch_iterator = NULL;
704 133 : node->initialized = false;
705 133 : node->shared_tbmiterator = NULL;
706 133 : node->shared_prefetch_iterator = NULL;
707 :
708 133 : ExecScanReScan(&node->ss);
709 :
710 : /*
711 : * if chgParam of subnode is not null then plan will be re-scanned by
712 : * first ExecProcNode.
713 : */
714 133 : if (outerPlan->chgParam == NULL)
715 28 : ExecReScan(outerPlan);
716 133 : }
717 :
718 : /* ----------------------------------------------------------------
719 : * ExecEndBitmapHeapScan
720 : * ----------------------------------------------------------------
721 : */
722 : void
723 1628 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
724 : {
725 : Relation relation;
726 : HeapScanDesc scanDesc;
727 :
728 : /*
729 : * extract information from the node
730 : */
731 1628 : relation = node->ss.ss_currentRelation;
732 1628 : scanDesc = node->ss.ss_currentScanDesc;
733 :
734 : /*
735 : * Free the exprcontext
736 : */
737 1628 : ExecFreeExprContext(&node->ss.ps);
738 :
739 : /*
740 : * clear out tuple table slots
741 : */
742 1628 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
743 1628 : ExecClearTuple(node->ss.ss_ScanTupleSlot);
744 :
745 : /*
746 : * close down subplans
747 : */
748 1628 : ExecEndNode(outerPlanState(node));
749 :
750 : /*
751 : * release bitmap if any
752 : */
753 1628 : if (node->tbmiterator)
754 1005 : tbm_end_iterate(node->tbmiterator);
755 1628 : if (node->prefetch_iterator)
756 508 : tbm_end_iterate(node->prefetch_iterator);
757 1628 : if (node->tbm)
758 1007 : tbm_free(node->tbm);
759 1628 : if (node->shared_tbmiterator)
760 46 : tbm_end_shared_iterate(node->shared_tbmiterator);
761 1628 : if (node->shared_prefetch_iterator)
762 43 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
763 :
764 : /*
765 : * close heap scan
766 : */
767 1628 : heap_endscan(scanDesc);
768 :
769 : /*
770 : * close the heap relation.
771 : */
772 1628 : ExecCloseScanRelation(relation);
773 1628 : }
774 :
775 : /* ----------------------------------------------------------------
776 : * ExecInitBitmapHeapScan
777 : *
778 : * Initializes the scan's state information.
779 : * ----------------------------------------------------------------
780 : */
781 : BitmapHeapScanState *
782 1632 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
783 : {
784 : BitmapHeapScanState *scanstate;
785 : Relation currentRelation;
786 : int io_concurrency;
787 :
788 : /* check for unsupported flags */
789 1632 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
790 :
791 : /*
792 : * Assert caller didn't ask for an unsafe snapshot --- see comments at
793 : * head of file.
794 : */
795 1632 : Assert(IsMVCCSnapshot(estate->es_snapshot));
796 :
797 : /*
798 : * create state structure
799 : */
800 1632 : scanstate = makeNode(BitmapHeapScanState);
801 1632 : scanstate->ss.ps.plan = (Plan *) node;
802 1632 : scanstate->ss.ps.state = estate;
803 1632 : scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
804 :
805 1632 : scanstate->tbm = NULL;
806 1632 : scanstate->tbmiterator = NULL;
807 1632 : scanstate->tbmres = NULL;
808 1632 : scanstate->exact_pages = 0;
809 1632 : scanstate->lossy_pages = 0;
810 1632 : scanstate->prefetch_iterator = NULL;
811 1632 : scanstate->prefetch_pages = 0;
812 1632 : scanstate->prefetch_target = 0;
813 : /* may be updated below */
814 1632 : scanstate->prefetch_maximum = target_prefetch_pages;
815 1632 : scanstate->pscan_len = 0;
816 1632 : scanstate->initialized = false;
817 1632 : scanstate->shared_tbmiterator = NULL;
818 1632 : scanstate->pstate = NULL;
819 :
820 : /*
821 : * Miscellaneous initialization
822 : *
823 : * create expression context for node
824 : */
825 1632 : ExecAssignExprContext(estate, &scanstate->ss.ps);
826 :
827 : /*
828 : * initialize child expressions
829 : */
830 1632 : scanstate->ss.ps.qual =
831 1632 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
832 1632 : scanstate->bitmapqualorig =
833 1632 : ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
834 :
835 : /*
836 : * tuple table initialization
837 : */
838 1632 : ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
839 1632 : ExecInitScanTupleSlot(estate, &scanstate->ss);
840 :
841 : /*
842 : * open the base relation and acquire appropriate lock on it.
843 : */
844 1632 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
845 :
846 : /*
847 : * Determine the maximum for prefetch_target. If the tablespace has a
848 : * specific IO concurrency set, use that to compute the corresponding
849 : * maximum value; otherwise, we already initialized to the value computed
850 : * by the GUC machinery.
851 : */
852 1632 : io_concurrency =
853 1632 : get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
854 1632 : if (io_concurrency != effective_io_concurrency)
855 : {
856 : double maximum;
857 :
858 0 : if (ComputeIoConcurrency(io_concurrency, &maximum))
859 0 : scanstate->prefetch_maximum = rint(maximum);
860 : }
861 :
862 1632 : scanstate->ss.ss_currentRelation = currentRelation;
863 :
864 : /*
865 : * Even though we aren't going to do a conventional seqscan, it is useful
866 : * to create a HeapScanDesc --- most of the fields in it are usable.
867 : */
868 1632 : scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
869 : estate->es_snapshot,
870 : 0,
871 : NULL);
872 :
873 : /*
874 : * get the scan type from the relation descriptor.
875 : */
876 1632 : ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
877 :
878 : /*
879 : * Initialize result tuple type and projection info.
880 : */
881 1632 : ExecAssignResultTypeFromTL(&scanstate->ss.ps);
882 1632 : ExecAssignScanProjectionInfo(&scanstate->ss);
883 :
884 : /*
885 : * initialize child nodes
886 : *
887 : * We do this last because the child nodes will open indexscans on our
888 : * relation's indexes, and we want to be sure we have acquired a lock on
889 : * the relation first.
890 : */
891 1632 : outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
892 :
893 : /*
894 : * all done.
895 : */
896 1632 : return scanstate;
897 : }
898 :
899 : /*----------------
900 : * BitmapShouldInitializeSharedState
901 : *
902 : * The first process to come here and see the state to the BM_INITIAL
903 : * will become the leader for the parallel bitmap scan and will be
904 : * responsible for populating the TIDBitmap. The other processes will
905 : * be blocked by the condition variable until the leader wakes them up.
906 : * ---------------
907 : */
908 : static bool
909 55 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
910 : {
911 : SharedBitmapState state;
912 :
913 : while (1)
914 : {
915 55 : SpinLockAcquire(&pstate->mutex);
916 55 : state = pstate->state;
917 55 : if (pstate->state == BM_INITIAL)
918 11 : pstate->state = BM_INPROGRESS;
919 55 : SpinLockRelease(&pstate->mutex);
920 :
921 : /* Exit if bitmap is done, or if we're the leader. */
922 55 : if (state != BM_INPROGRESS)
923 55 : break;
924 :
925 : /* Wait for the leader to wake us up. */
926 0 : ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
927 0 : }
928 :
929 55 : ConditionVariableCancelSleep();
930 :
931 55 : return (state == BM_INITIAL);
932 : }
933 :
934 : /* ----------------------------------------------------------------
935 : * ExecBitmapHeapEstimate
936 : *
937 : * estimates the space required to serialize bitmap scan node.
938 : * ----------------------------------------------------------------
939 : */
940 : void
941 2 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
942 : ParallelContext *pcxt)
943 : {
944 2 : EState *estate = node->ss.ps.state;
945 :
946 2 : node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
947 : phs_snapshot_data),
948 : EstimateSnapshotSpace(estate->es_snapshot));
949 :
950 2 : shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
951 2 : shm_toc_estimate_keys(&pcxt->estimator, 1);
952 2 : }
953 :
954 : /* ----------------------------------------------------------------
955 : * ExecBitmapHeapInitializeDSM
956 : *
957 : * Set up a parallel bitmap heap scan descriptor.
958 : * ----------------------------------------------------------------
959 : */
960 : void
961 2 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
962 : ParallelContext *pcxt)
963 : {
964 : ParallelBitmapHeapState *pstate;
965 2 : EState *estate = node->ss.ps.state;
966 :
967 2 : pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
968 :
969 2 : pstate->tbmiterator = 0;
970 2 : pstate->prefetch_iterator = 0;
971 :
972 : /* Initialize the mutex */
973 2 : SpinLockInit(&pstate->mutex);
974 2 : pstate->prefetch_pages = 0;
975 2 : pstate->prefetch_target = 0;
976 2 : pstate->state = BM_INITIAL;
977 :
978 2 : ConditionVariableInit(&pstate->cv);
979 2 : SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
980 :
981 2 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
982 2 : node->pstate = pstate;
983 2 : }
984 :
985 : /* ----------------------------------------------------------------
986 : * ExecBitmapHeapReInitializeDSM
987 : *
988 : * Reset shared state before beginning a fresh scan.
989 : * ----------------------------------------------------------------
990 : */
991 : void
992 9 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
993 : ParallelContext *pcxt)
994 : {
995 9 : ParallelBitmapHeapState *pstate = node->pstate;
996 9 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
997 :
998 9 : pstate->state = BM_INITIAL;
999 :
1000 9 : if (DsaPointerIsValid(pstate->tbmiterator))
1001 9 : tbm_free_shared_area(dsa, pstate->tbmiterator);
1002 :
1003 9 : if (DsaPointerIsValid(pstate->prefetch_iterator))
1004 9 : tbm_free_shared_area(dsa, pstate->prefetch_iterator);
1005 :
1006 9 : pstate->tbmiterator = InvalidDsaPointer;
1007 9 : pstate->prefetch_iterator = InvalidDsaPointer;
1008 9 : }
1009 :
1010 : /* ----------------------------------------------------------------
1011 : * ExecBitmapHeapInitializeWorker
1012 : *
1013 : * Copy relevant information from TOC into planstate.
1014 : * ----------------------------------------------------------------
1015 : */
1016 : void
1017 44 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, shm_toc *toc)
1018 : {
1019 : ParallelBitmapHeapState *pstate;
1020 : Snapshot snapshot;
1021 :
1022 44 : pstate = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
1023 44 : node->pstate = pstate;
1024 :
1025 44 : snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
1026 44 : heap_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
1027 44 : }
|