Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeSamplescan.c
4 : * Support routines for sample scans of relations (table sampling).
5 : *
6 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeSamplescan.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/hash.h"
18 : #include "access/relscan.h"
19 : #include "access/tsmapi.h"
20 : #include "executor/executor.h"
21 : #include "executor/nodeSamplescan.h"
22 : #include "miscadmin.h"
23 : #include "pgstat.h"
24 : #include "storage/predicate.h"
25 : #include "utils/builtins.h"
26 : #include "utils/rel.h"
27 : #include "utils/tqual.h"
28 :
29 : static void InitScanRelation(SampleScanState *node, EState *estate, int eflags);
30 : static TupleTableSlot *SampleNext(SampleScanState *node);
31 : static void tablesample_init(SampleScanState *scanstate);
32 : static HeapTuple tablesample_getnext(SampleScanState *scanstate);
33 : static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
34 : HeapScanDesc scan);
35 :
36 : /* ----------------------------------------------------------------
37 : * Scan Support
38 : * ----------------------------------------------------------------
39 : */
40 :
41 : /* ----------------------------------------------------------------
42 : * SampleNext
43 : *
44 : * This is a workhorse for ExecSampleScan
45 : * ----------------------------------------------------------------
46 : */
47 : static TupleTableSlot *
48 40164 : SampleNext(SampleScanState *node)
49 : {
50 : HeapTuple tuple;
51 : TupleTableSlot *slot;
52 :
53 : /*
54 : * if this is first call within a scan, initialize
55 : */
56 40164 : if (!node->begun)
57 32 : tablesample_init(node);
58 :
59 : /*
60 : * get the next tuple, and store it in our result slot
61 : */
62 40158 : tuple = tablesample_getnext(node);
63 :
64 40158 : slot = node->ss.ss_ScanTupleSlot;
65 :
66 40158 : if (tuple)
67 40134 : ExecStoreTuple(tuple, /* tuple to store */
68 : slot, /* slot to store in */
69 40134 : node->ss.ss_currentScanDesc->rs_cbuf, /* tuple's buffer */
70 : false); /* don't pfree this pointer */
71 : else
72 24 : ExecClearTuple(slot);
73 :
74 40158 : return slot;
75 : }
76 :
77 : /*
78 : * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
79 : */
80 : static bool
81 0 : SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
82 : {
83 : /*
84 : * No need to recheck for SampleScan, since like SeqScan we don't pass any
85 : * checkable keys to heap_beginscan.
86 : */
87 0 : return true;
88 : }
89 :
90 : /* ----------------------------------------------------------------
91 : * ExecSampleScan(node)
92 : *
93 : * Scans the relation using the sampling method and returns
94 : * the next qualifying tuple.
95 : * We call the ExecScan() routine and pass it the appropriate
96 : * access method functions.
97 : * ----------------------------------------------------------------
98 : */
99 : static TupleTableSlot *
100 40163 : ExecSampleScan(PlanState *pstate)
101 : {
102 40163 : SampleScanState *node = castNode(SampleScanState, pstate);
103 :
104 40163 : return ExecScan(&node->ss,
105 : (ExecScanAccessMtd) SampleNext,
106 : (ExecScanRecheckMtd) SampleRecheck);
107 : }
108 :
109 : /* ----------------------------------------------------------------
110 : * InitScanRelation
111 : *
112 : * Set up to access the scan relation.
113 : * ----------------------------------------------------------------
114 : */
115 : static void
116 36 : InitScanRelation(SampleScanState *node, EState *estate, int eflags)
117 : {
118 : Relation currentRelation;
119 :
120 : /*
121 : * get the relation object id from the relid'th entry in the range table,
122 : * open that relation and acquire appropriate lock on it.
123 : */
124 36 : currentRelation = ExecOpenScanRelation(estate,
125 36 : ((SampleScan *) node->ss.ps.plan)->scan.scanrelid,
126 : eflags);
127 :
128 36 : node->ss.ss_currentRelation = currentRelation;
129 :
130 : /* we won't set up the HeapScanDesc till later */
131 36 : node->ss.ss_currentScanDesc = NULL;
132 :
133 : /* and report the scan tuple slot's rowtype */
134 36 : ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
135 36 : }
136 :
137 :
138 : /* ----------------------------------------------------------------
139 : * ExecInitSampleScan
140 : * ----------------------------------------------------------------
141 : */
142 : SampleScanState *
143 36 : ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
144 : {
145 : SampleScanState *scanstate;
146 36 : TableSampleClause *tsc = node->tablesample;
147 : TsmRoutine *tsm;
148 :
149 36 : Assert(outerPlan(node) == NULL);
150 36 : Assert(innerPlan(node) == NULL);
151 :
152 : /*
153 : * create state structure
154 : */
155 36 : scanstate = makeNode(SampleScanState);
156 36 : scanstate->ss.ps.plan = (Plan *) node;
157 36 : scanstate->ss.ps.state = estate;
158 36 : scanstate->ss.ps.ExecProcNode = ExecSampleScan;
159 :
160 : /*
161 : * Miscellaneous initialization
162 : *
163 : * create expression context for node
164 : */
165 36 : ExecAssignExprContext(estate, &scanstate->ss.ps);
166 :
167 : /*
168 : * initialize child expressions
169 : */
170 36 : scanstate->ss.ps.qual =
171 36 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
172 :
173 36 : scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
174 36 : scanstate->repeatable =
175 36 : ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
176 :
177 : /*
178 : * tuple table initialization
179 : */
180 36 : ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
181 36 : ExecInitScanTupleSlot(estate, &scanstate->ss);
182 :
183 : /*
184 : * initialize scan relation
185 : */
186 36 : InitScanRelation(scanstate, estate, eflags);
187 :
188 : /*
189 : * Initialize result tuple type and projection info.
190 : */
191 36 : ExecAssignResultTypeFromTL(&scanstate->ss.ps);
192 36 : ExecAssignScanProjectionInfo(&scanstate->ss);
193 :
194 : /*
195 : * If we don't have a REPEATABLE clause, select a random seed. We want to
196 : * do this just once, since the seed shouldn't change over rescans.
197 : */
198 36 : if (tsc->repeatable == NULL)
199 22 : scanstate->seed = random();
200 :
201 : /*
202 : * Finally, initialize the TABLESAMPLE method handler.
203 : */
204 36 : tsm = GetTsmRoutine(tsc->tsmhandler);
205 36 : scanstate->tsmroutine = tsm;
206 36 : scanstate->tsm_state = NULL;
207 :
208 36 : if (tsm->InitSampleScan)
209 36 : tsm->InitSampleScan(scanstate, eflags);
210 :
211 : /* We'll do BeginSampleScan later; we can't evaluate params yet */
212 36 : scanstate->begun = false;
213 :
214 36 : return scanstate;
215 : }
216 :
217 : /* ----------------------------------------------------------------
218 : * ExecEndSampleScan
219 : *
220 : * frees any storage allocated through C routines.
221 : * ----------------------------------------------------------------
222 : */
223 : void
224 30 : ExecEndSampleScan(SampleScanState *node)
225 : {
226 : /*
227 : * Tell sampling function that we finished the scan.
228 : */
229 30 : if (node->tsmroutine->EndSampleScan)
230 0 : node->tsmroutine->EndSampleScan(node);
231 :
232 : /*
233 : * Free the exprcontext
234 : */
235 30 : ExecFreeExprContext(&node->ss.ps);
236 :
237 : /*
238 : * clean out the tuple table
239 : */
240 30 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
241 30 : ExecClearTuple(node->ss.ss_ScanTupleSlot);
242 :
243 : /*
244 : * close heap scan
245 : */
246 30 : if (node->ss.ss_currentScanDesc)
247 21 : heap_endscan(node->ss.ss_currentScanDesc);
248 :
249 : /*
250 : * close the heap relation.
251 : */
252 30 : ExecCloseScanRelation(node->ss.ss_currentRelation);
253 30 : }
254 :
255 : /* ----------------------------------------------------------------
256 : * ExecReScanSampleScan
257 : *
258 : * Rescans the relation.
259 : *
260 : * ----------------------------------------------------------------
261 : */
262 : void
263 10 : ExecReScanSampleScan(SampleScanState *node)
264 : {
265 : /* Remember we need to do BeginSampleScan again (if we did it at all) */
266 10 : node->begun = false;
267 :
268 10 : ExecScanReScan(&node->ss);
269 10 : }
270 :
271 :
272 : /*
273 : * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
274 : */
275 : static void
276 32 : tablesample_init(SampleScanState *scanstate)
277 : {
278 32 : TsmRoutine *tsm = scanstate->tsmroutine;
279 32 : ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
280 : Datum *params;
281 : Datum datum;
282 : bool isnull;
283 : uint32 seed;
284 : bool allow_sync;
285 : int i;
286 : ListCell *arg;
287 :
288 32 : params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
289 :
290 32 : i = 0;
291 63 : foreach(arg, scanstate->args)
292 : {
293 32 : ExprState *argstate = (ExprState *) lfirst(arg);
294 :
295 32 : params[i] = ExecEvalExprSwitchContext(argstate,
296 : econtext,
297 : &isnull);
298 32 : if (isnull)
299 1 : ereport(ERROR,
300 : (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
301 : errmsg("TABLESAMPLE parameter cannot be null")));
302 31 : i++;
303 : }
304 :
305 31 : if (scanstate->repeatable)
306 : {
307 13 : datum = ExecEvalExprSwitchContext(scanstate->repeatable,
308 : econtext,
309 : &isnull);
310 13 : if (isnull)
311 1 : ereport(ERROR,
312 : (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
313 : errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
314 :
315 : /*
316 : * The REPEATABLE parameter has been coerced to float8 by the parser.
317 : * The reason for using float8 at the SQL level is that it will
318 : * produce unsurprising results both for users used to databases that
319 : * accept only integers in the REPEATABLE clause and for those who
320 : * might expect that REPEATABLE works like setseed() (a float in the
321 : * range from -1 to 1).
322 : *
323 : * We use hashfloat8() to convert the supplied value into a suitable
324 : * seed. For regression-testing purposes, that has the convenient
325 : * property that REPEATABLE(0) gives a machine-independent result.
326 : */
327 12 : seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
328 : }
329 : else
330 : {
331 : /* Use the seed selected by ExecInitSampleScan */
332 18 : seed = scanstate->seed;
333 : }
334 :
335 : /* Set default values for params that BeginSampleScan can adjust */
336 30 : scanstate->use_bulkread = true;
337 30 : scanstate->use_pagemode = true;
338 :
339 : /* Let tablesample method do its thing */
340 60 : tsm->BeginSampleScan(scanstate,
341 : params,
342 30 : list_length(scanstate->args),
343 : seed);
344 :
345 : /* We'll use syncscan if there's no NextSampleBlock function */
346 26 : allow_sync = (tsm->NextSampleBlock == NULL);
347 :
348 : /* Now we can create or reset the HeapScanDesc */
349 26 : if (scanstate->ss.ss_currentScanDesc == NULL)
350 : {
351 21 : scanstate->ss.ss_currentScanDesc =
352 63 : heap_beginscan_sampling(scanstate->ss.ss_currentRelation,
353 21 : scanstate->ss.ps.state->es_snapshot,
354 : 0, NULL,
355 21 : scanstate->use_bulkread,
356 : allow_sync,
357 21 : scanstate->use_pagemode);
358 : }
359 : else
360 : {
361 10 : heap_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
362 5 : scanstate->use_bulkread,
363 : allow_sync,
364 5 : scanstate->use_pagemode);
365 : }
366 :
367 26 : pfree(params);
368 :
369 : /* And we're initialized. */
370 26 : scanstate->begun = true;
371 26 : }
372 :
373 : /*
374 : * Get next tuple from TABLESAMPLE method.
375 : *
376 : * Note: an awful lot of this is copied-and-pasted from heapam.c. It would
377 : * perhaps be better to refactor to share more code.
378 : */
379 : static HeapTuple
380 40158 : tablesample_getnext(SampleScanState *scanstate)
381 : {
382 40158 : TsmRoutine *tsm = scanstate->tsmroutine;
383 40158 : HeapScanDesc scan = scanstate->ss.ss_currentScanDesc;
384 40158 : HeapTuple tuple = &(scan->rs_ctup);
385 40158 : Snapshot snapshot = scan->rs_snapshot;
386 40158 : bool pagemode = scan->rs_pageatatime;
387 : BlockNumber blockno;
388 : Page page;
389 : bool all_visible;
390 : OffsetNumber maxoffset;
391 :
392 40158 : if (!scan->rs_inited)
393 : {
394 : /*
395 : * return null immediately if relation is empty
396 : */
397 26 : if (scan->rs_nblocks == 0)
398 : {
399 0 : Assert(!BufferIsValid(scan->rs_cbuf));
400 0 : tuple->t_data = NULL;
401 0 : return NULL;
402 : }
403 26 : if (tsm->NextSampleBlock)
404 : {
405 13 : blockno = tsm->NextSampleBlock(scanstate);
406 13 : if (!BlockNumberIsValid(blockno))
407 : {
408 3 : tuple->t_data = NULL;
409 3 : return NULL;
410 : }
411 : }
412 : else
413 13 : blockno = scan->rs_startblock;
414 23 : Assert(blockno < scan->rs_nblocks);
415 23 : heapgetpage(scan, blockno);
416 23 : scan->rs_inited = true;
417 : }
418 : else
419 : {
420 : /* continue from previously returned page/tuple */
421 40132 : blockno = scan->rs_cblock; /* current page */
422 : }
423 :
424 : /*
425 : * When not using pagemode, we must lock the buffer during tuple
426 : * visibility checks.
427 : */
428 40155 : if (!pagemode)
429 5 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
430 :
431 40155 : page = (Page) BufferGetPage(scan->rs_cbuf);
432 40155 : all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
433 40155 : maxoffset = PageGetMaxOffsetNumber(page);
434 :
435 : for (;;)
436 : {
437 : OffsetNumber tupoffset;
438 : bool finished;
439 :
440 42242 : CHECK_FOR_INTERRUPTS();
441 :
442 : /* Ask the tablesample method which tuples to check on this page. */
443 42242 : tupoffset = tsm->NextSampleTuple(scanstate,
444 : blockno,
445 : maxoffset);
446 :
447 42242 : if (OffsetNumberIsValid(tupoffset))
448 : {
449 : ItemId itemid;
450 : bool visible;
451 :
452 : /* Skip invalid tuple pointers. */
453 40134 : itemid = PageGetItemId(page, tupoffset);
454 40134 : if (!ItemIdIsNormal(itemid))
455 0 : continue;
456 :
457 40134 : tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
458 40134 : tuple->t_len = ItemIdGetLength(itemid);
459 40134 : ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
460 :
461 40134 : if (all_visible)
462 40058 : visible = true;
463 : else
464 76 : visible = SampleTupleVisible(tuple, tupoffset, scan);
465 :
466 : /* in pagemode, heapgetpage did this for us */
467 40134 : if (!pagemode)
468 1 : CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
469 : scan->rs_cbuf, snapshot);
470 :
471 40134 : if (visible)
472 : {
473 : /* Found visible tuple, return it. */
474 40134 : if (!pagemode)
475 1 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
476 40134 : break;
477 : }
478 : else
479 : {
480 : /* Try next tuple from same page. */
481 0 : continue;
482 : }
483 : }
484 :
485 : /*
486 : * if we get here, it means we've exhausted the items on this page and
487 : * it's time to move to the next.
488 : */
489 2108 : if (!pagemode)
490 698 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
491 :
492 2108 : if (tsm->NextSampleBlock)
493 : {
494 710 : blockno = tsm->NextSampleBlock(scanstate);
495 710 : Assert(!scan->rs_syncscan);
496 710 : finished = !BlockNumberIsValid(blockno);
497 : }
498 : else
499 : {
500 : /* Without NextSampleBlock, just do a plain forward seqscan. */
501 1398 : blockno++;
502 1398 : if (blockno >= scan->rs_nblocks)
503 13 : blockno = 0;
504 :
505 : /*
506 : * Report our new scan position for synchronization purposes.
507 : *
508 : * Note: we do this before checking for end of scan so that the
509 : * final state of the position hint is back at the start of the
510 : * rel. That's not strictly necessary, but otherwise when you run
511 : * the same query multiple times the starting position would shift
512 : * a little bit backwards on every invocation, which is confusing.
513 : * We don't guarantee any specific ordering in general, though.
514 : */
515 1398 : if (scan->rs_syncscan)
516 0 : ss_report_location(scan->rs_rd, blockno);
517 :
518 1398 : finished = (blockno == scan->rs_startblock);
519 : }
520 :
521 : /*
522 : * Reached end of scan?
523 : */
524 2108 : if (finished)
525 : {
526 21 : if (BufferIsValid(scan->rs_cbuf))
527 21 : ReleaseBuffer(scan->rs_cbuf);
528 21 : scan->rs_cbuf = InvalidBuffer;
529 21 : scan->rs_cblock = InvalidBlockNumber;
530 21 : tuple->t_data = NULL;
531 21 : scan->rs_inited = false;
532 21 : return NULL;
533 : }
534 :
535 2087 : Assert(blockno < scan->rs_nblocks);
536 2087 : heapgetpage(scan, blockno);
537 :
538 : /* Re-establish state for new page */
539 2087 : if (!pagemode)
540 694 : LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
541 :
542 2087 : page = (Page) BufferGetPage(scan->rs_cbuf);
543 2087 : all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
544 2087 : maxoffset = PageGetMaxOffsetNumber(page);
545 2087 : }
546 :
547 : /* Count successfully-fetched tuples as heap fetches */
548 40134 : pgstat_count_heap_getnext(scan->rs_rd);
549 :
550 40134 : return &(scan->rs_ctup);
551 : }
552 :
553 : /*
554 : * Check visibility of the tuple.
555 : */
556 : static bool
557 76 : SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
558 : {
559 76 : if (scan->rs_pageatatime)
560 : {
561 : /*
562 : * In pageatatime mode, heapgetpage() already did visibility checks,
563 : * so just look at the info it left in rs_vistuples[].
564 : *
565 : * We use a binary search over the known-sorted array. Note: we could
566 : * save some effort if we insisted that NextSampleTuple select tuples
567 : * in increasing order, but it's not clear that there would be enough
568 : * gain to justify the restriction.
569 : */
570 75 : int start = 0,
571 75 : end = scan->rs_ntuples - 1;
572 :
573 207 : while (start <= end)
574 : {
575 132 : int mid = (start + end) / 2;
576 132 : OffsetNumber curoffset = scan->rs_vistuples[mid];
577 :
578 132 : if (tupoffset == curoffset)
579 75 : return true;
580 57 : else if (tupoffset < curoffset)
581 24 : end = mid - 1;
582 : else
583 33 : start = mid + 1;
584 : }
585 :
586 0 : return false;
587 : }
588 : else
589 : {
590 : /* Otherwise, we have to check the tuple individually. */
591 1 : return HeapTupleSatisfiesVisibility(tuple,
592 : scan->rs_snapshot,
593 : scan->rs_cbuf);
594 : }
595 : }
|