LCOV - code coverage report
Current view: top level - src/backend/access/tablesample - system.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 66 66 100.0 %
Date: 2017-09-29 13:40:31 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * system.c
       4             :  *    support routines for SYSTEM tablesample method
       5             :  *
       6             :  * To ensure repeatability of samples, it is necessary that selection of a
       7             :  * given tuple be history-independent; otherwise syncscanning would break
       8             :  * repeatability, to say nothing of logically-irrelevant maintenance such
       9             :  * as physical extension or shortening of the relation.
      10             :  *
      11             :  * To achieve that, we proceed by hashing each candidate block number together
      12             :  * with the active seed, and then selecting it if the hash is less than the
      13             :  * cutoff value computed from the selection probability by BeginSampleScan.
      14             :  *
      15             :  *
      16             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      17             :  * Portions Copyright (c) 1994, Regents of the University of California
      18             :  *
      19             :  * IDENTIFICATION
      20             :  *    src/backend/access/tablesample/system.c
      21             :  *
      22             :  *-------------------------------------------------------------------------
      23             :  */
      24             : 
      25             : #include "postgres.h"
      26             : 
      27             : #ifdef _MSC_VER
      28             : #include <float.h>                /* for _isnan */
      29             : #endif
      30             : #include <math.h>
      31             : 
      32             : #include "access/hash.h"
      33             : #include "access/relscan.h"
      34             : #include "access/tsmapi.h"
      35             : #include "catalog/pg_type.h"
      36             : #include "optimizer/clauses.h"
      37             : #include "optimizer/cost.h"
      38             : #include "utils/builtins.h"
      39             : 
      40             : 
      41             : /* Private state */
      42             : typedef struct
      43             : {
      44             :     uint64      cutoff;         /* select blocks with hash less than this */
      45             :     uint32      seed;           /* random seed */
      46             :     BlockNumber nextblock;      /* next block to consider sampling */
      47             :     OffsetNumber lt;            /* last tuple returned from current block */
      48             : } SystemSamplerData;
      49             : 
      50             : 
      51             : static void system_samplescangetsamplesize(PlannerInfo *root,
      52             :                                RelOptInfo *baserel,
      53             :                                List *paramexprs,
      54             :                                BlockNumber *pages,
      55             :                                double *tuples);
      56             : static void system_initsamplescan(SampleScanState *node,
      57             :                       int eflags);
      58             : static void system_beginsamplescan(SampleScanState *node,
      59             :                        Datum *params,
      60             :                        int nparams,
      61             :                        uint32 seed);
      62             : static BlockNumber system_nextsampleblock(SampleScanState *node);
      63             : static OffsetNumber system_nextsampletuple(SampleScanState *node,
      64             :                        BlockNumber blockno,
      65             :                        OffsetNumber maxoffset);
      66             : 
      67             : 
      68             : /*
      69             :  * Create a TsmRoutine descriptor for the SYSTEM method.
      70             :  */
      71             : Datum
      72          67 : tsm_system_handler(PG_FUNCTION_ARGS)
      73             : {
      74          67 :     TsmRoutine *tsm = makeNode(TsmRoutine);
      75             : 
      76          67 :     tsm->parameterTypes = list_make1_oid(FLOAT4OID);
      77          67 :     tsm->repeatable_across_queries = true;
      78          67 :     tsm->repeatable_across_scans = true;
      79          67 :     tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
      80          67 :     tsm->InitSampleScan = system_initsamplescan;
      81          67 :     tsm->BeginSampleScan = system_beginsamplescan;
      82          67 :     tsm->NextSampleBlock = system_nextsampleblock;
      83          67 :     tsm->NextSampleTuple = system_nextsampletuple;
      84          67 :     tsm->EndSampleScan = NULL;
      85             : 
      86          67 :     PG_RETURN_POINTER(tsm);
      87             : }
      88             : 
      89             : /*
      90             :  * Sample size estimation.
      91             :  */
      92             : static void
      93          16 : system_samplescangetsamplesize(PlannerInfo *root,
      94             :                                RelOptInfo *baserel,
      95             :                                List *paramexprs,
      96             :                                BlockNumber *pages,
      97             :                                double *tuples)
      98             : {
      99             :     Node       *pctnode;
     100             :     float4      samplefract;
     101             : 
     102             :     /* Try to extract an estimate for the sample percentage */
     103          16 :     pctnode = (Node *) linitial(paramexprs);
     104          16 :     pctnode = estimate_expression_value(root, pctnode);
     105             : 
     106          30 :     if (IsA(pctnode, Const) &&
     107          14 :         !((Const *) pctnode)->constisnull)
     108             :     {
     109          13 :         samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
     110          26 :         if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
     111          11 :             samplefract /= 100.0f;
     112             :         else
     113             :         {
     114             :             /* Default samplefract if the value is bogus */
     115           2 :             samplefract = 0.1f;
     116             :         }
     117             :     }
     118             :     else
     119             :     {
     120             :         /* Default samplefract if we didn't obtain a non-null Const */
     121           3 :         samplefract = 0.1f;
     122             :     }
     123             : 
     124             :     /* We'll visit a sample of the pages ... */
     125          16 :     *pages = clamp_row_est(baserel->pages * samplefract);
     126             : 
     127             :     /* ... and hopefully get a representative number of tuples from them */
     128          16 :     *tuples = clamp_row_est(baserel->tuples * samplefract);
     129          16 : }
     130             : 
     131             : /*
     132             :  * Initialize during executor setup.
     133             :  */
     134             : static void
     135          16 : system_initsamplescan(SampleScanState *node, int eflags)
     136             : {
     137          16 :     node->tsm_state = palloc0(sizeof(SystemSamplerData));
     138          16 : }
     139             : 
     140             : /*
     141             :  * Examine parameters and prepare for a sample scan.
     142             :  */
     143             : static void
     144          15 : system_beginsamplescan(SampleScanState *node,
     145             :                        Datum *params,
     146             :                        int nparams,
     147             :                        uint32 seed)
     148             : {
     149          15 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     150          15 :     double      percent = DatumGetFloat4(params[0]);
     151             :     double      dcutoff;
     152             : 
     153          15 :     if (percent < 0 || percent > 100 || isnan(percent))
     154           2 :         ereport(ERROR,
     155             :                 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
     156             :                  errmsg("sample percentage must be between 0 and 100")));
     157             : 
     158             :     /*
     159             :      * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
     160             :      * store that as a uint64, of course.  Note that this gives strictly
     161             :      * correct behavior at the limits of zero or one probability.
     162             :      */
     163          13 :     dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
     164          13 :     sampler->cutoff = (uint64) dcutoff;
     165          13 :     sampler->seed = seed;
     166          13 :     sampler->nextblock = 0;
     167          13 :     sampler->lt = InvalidOffsetNumber;
     168             : 
     169             :     /*
     170             :      * Bulkread buffer access strategy probably makes sense unless we're
     171             :      * scanning a very small fraction of the table.  The 1% cutoff here is a
     172             :      * guess.  We should use pagemode visibility checking, since we scan all
     173             :      * tuples on each selected page.
     174             :      */
     175          13 :     node->use_bulkread = (percent >= 1);
     176          13 :     node->use_pagemode = true;
     177          13 : }
     178             : 
     179             : /*
     180             :  * Select next block to sample.
     181             :  */
     182             : static BlockNumber
     183         723 : system_nextsampleblock(SampleScanState *node)
     184             : {
     185         723 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     186         723 :     HeapScanDesc scan = node->ss.ss_currentScanDesc;
     187         723 :     BlockNumber nextblock = sampler->nextblock;
     188             :     uint32      hashinput[2];
     189             : 
     190             :     /*
     191             :      * We compute the hash by applying hash_any to an array of 2 uint32's
     192             :      * containing the block number and seed.  This is efficient to set up, and
     193             :      * with the current implementation of hash_any, it gives
     194             :      * machine-independent results, which is a nice property for regression
     195             :      * testing.
     196             :      *
     197             :      * These words in the hash input are the same throughout the block:
     198             :      */
     199         723 :     hashinput[1] = sampler->seed;
     200             : 
     201             :     /*
     202             :      * Loop over block numbers until finding suitable block or reaching end of
     203             :      * relation.
     204             :      */
     205        1425 :     for (; nextblock < scan->rs_nblocks; nextblock++)
     206             :     {
     207             :         uint32      hash;
     208             : 
     209        1414 :         hashinput[0] = nextblock;
     210             : 
     211        1414 :         hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
     212             :                                        (int) sizeof(hashinput)));
     213        1414 :         if (hash < sampler->cutoff)
     214         712 :             break;
     215             :     }
     216             : 
     217         723 :     if (nextblock < scan->rs_nblocks)
     218             :     {
     219             :         /* Found a suitable block; remember where we should start next time */
     220         712 :         sampler->nextblock = nextblock + 1;
     221         712 :         return nextblock;
     222             :     }
     223             : 
     224             :     /* Done, but let's reset nextblock to 0 for safety. */
     225          11 :     sampler->nextblock = 0;
     226          11 :     return InvalidBlockNumber;
     227             : }
     228             : 
     229             : /*
     230             :  * Select next sampled tuple in current block.
     231             :  *
     232             :  * In block sampling, we just want to sample all the tuples in each selected
     233             :  * block.
     234             :  *
     235             :  * It is OK here to return an offset without knowing if the tuple is visible
     236             :  * (or even exists); nodeSamplescan.c will deal with that.
     237             :  *
     238             :  * When we reach end of the block, return InvalidOffsetNumber which tells
     239             :  * SampleScan to go to next block.
     240             :  */
     241             : static OffsetNumber
     242       20770 : system_nextsampletuple(SampleScanState *node,
     243             :                        BlockNumber blockno,
     244             :                        OffsetNumber maxoffset)
     245             : {
     246       20770 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     247       20770 :     OffsetNumber tupoffset = sampler->lt;
     248             : 
     249             :     /* Advance to next possible offset on page */
     250       20770 :     if (tupoffset == InvalidOffsetNumber)
     251         712 :         tupoffset = FirstOffsetNumber;
     252             :     else
     253       20058 :         tupoffset++;
     254             : 
     255             :     /* Done? */
     256       20770 :     if (tupoffset > maxoffset)
     257         710 :         tupoffset = InvalidOffsetNumber;
     258             : 
     259       20770 :     sampler->lt = tupoffset;
     260             : 
     261       20770 :     return tupoffset;
     262             : }

Generated by: LCOV version 1.11