Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * system.c
4 : * support routines for SYSTEM tablesample method
5 : *
6 : * To ensure repeatability of samples, it is necessary that selection of a
7 : * given tuple be history-independent; otherwise syncscanning would break
8 : * repeatability, to say nothing of logically-irrelevant maintenance such
9 : * as physical extension or shortening of the relation.
10 : *
11 : * To achieve that, we proceed by hashing each candidate block number together
12 : * with the active seed, and then selecting it if the hash is less than the
13 : * cutoff value computed from the selection probability by BeginSampleScan.
14 : *
15 : *
16 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
17 : * Portions Copyright (c) 1994, Regents of the University of California
18 : *
19 : * IDENTIFICATION
20 : * src/backend/access/tablesample/system.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 :
25 : #include "postgres.h"
26 :
27 : #ifdef _MSC_VER
28 : #include <float.h> /* for _isnan */
29 : #endif
30 : #include <math.h>
31 :
32 : #include "access/hash.h"
33 : #include "access/relscan.h"
34 : #include "access/tsmapi.h"
35 : #include "catalog/pg_type.h"
36 : #include "optimizer/clauses.h"
37 : #include "optimizer/cost.h"
38 : #include "utils/builtins.h"
39 :
40 :
41 : /* Private state */
42 : typedef struct
43 : {
44 : uint64 cutoff; /* select blocks with hash less than this */
45 : uint32 seed; /* random seed */
46 : BlockNumber nextblock; /* next block to consider sampling */
47 : OffsetNumber lt; /* last tuple returned from current block */
48 : } SystemSamplerData;
49 :
50 :
51 : static void system_samplescangetsamplesize(PlannerInfo *root,
52 : RelOptInfo *baserel,
53 : List *paramexprs,
54 : BlockNumber *pages,
55 : double *tuples);
56 : static void system_initsamplescan(SampleScanState *node,
57 : int eflags);
58 : static void system_beginsamplescan(SampleScanState *node,
59 : Datum *params,
60 : int nparams,
61 : uint32 seed);
62 : static BlockNumber system_nextsampleblock(SampleScanState *node);
63 : static OffsetNumber system_nextsampletuple(SampleScanState *node,
64 : BlockNumber blockno,
65 : OffsetNumber maxoffset);
66 :
67 :
68 : /*
69 : * Create a TsmRoutine descriptor for the SYSTEM method.
70 : */
71 : Datum
72 67 : tsm_system_handler(PG_FUNCTION_ARGS)
73 : {
74 67 : TsmRoutine *tsm = makeNode(TsmRoutine);
75 :
76 67 : tsm->parameterTypes = list_make1_oid(FLOAT4OID);
77 67 : tsm->repeatable_across_queries = true;
78 67 : tsm->repeatable_across_scans = true;
79 67 : tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
80 67 : tsm->InitSampleScan = system_initsamplescan;
81 67 : tsm->BeginSampleScan = system_beginsamplescan;
82 67 : tsm->NextSampleBlock = system_nextsampleblock;
83 67 : tsm->NextSampleTuple = system_nextsampletuple;
84 67 : tsm->EndSampleScan = NULL;
85 :
86 67 : PG_RETURN_POINTER(tsm);
87 : }
88 :
89 : /*
90 : * Sample size estimation.
91 : */
92 : static void
93 16 : system_samplescangetsamplesize(PlannerInfo *root,
94 : RelOptInfo *baserel,
95 : List *paramexprs,
96 : BlockNumber *pages,
97 : double *tuples)
98 : {
99 : Node *pctnode;
100 : float4 samplefract;
101 :
102 : /* Try to extract an estimate for the sample percentage */
103 16 : pctnode = (Node *) linitial(paramexprs);
104 16 : pctnode = estimate_expression_value(root, pctnode);
105 :
106 30 : if (IsA(pctnode, Const) &&
107 14 : !((Const *) pctnode)->constisnull)
108 : {
109 13 : samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
110 26 : if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
111 11 : samplefract /= 100.0f;
112 : else
113 : {
114 : /* Default samplefract if the value is bogus */
115 2 : samplefract = 0.1f;
116 : }
117 : }
118 : else
119 : {
120 : /* Default samplefract if we didn't obtain a non-null Const */
121 3 : samplefract = 0.1f;
122 : }
123 :
124 : /* We'll visit a sample of the pages ... */
125 16 : *pages = clamp_row_est(baserel->pages * samplefract);
126 :
127 : /* ... and hopefully get a representative number of tuples from them */
128 16 : *tuples = clamp_row_est(baserel->tuples * samplefract);
129 16 : }
130 :
131 : /*
132 : * Initialize during executor setup.
133 : */
134 : static void
135 16 : system_initsamplescan(SampleScanState *node, int eflags)
136 : {
137 16 : node->tsm_state = palloc0(sizeof(SystemSamplerData));
138 16 : }
139 :
140 : /*
141 : * Examine parameters and prepare for a sample scan.
142 : */
143 : static void
144 15 : system_beginsamplescan(SampleScanState *node,
145 : Datum *params,
146 : int nparams,
147 : uint32 seed)
148 : {
149 15 : SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
150 15 : double percent = DatumGetFloat4(params[0]);
151 : double dcutoff;
152 :
153 15 : if (percent < 0 || percent > 100 || isnan(percent))
154 2 : ereport(ERROR,
155 : (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
156 : errmsg("sample percentage must be between 0 and 100")));
157 :
158 : /*
159 : * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
160 : * store that as a uint64, of course. Note that this gives strictly
161 : * correct behavior at the limits of zero or one probability.
162 : */
163 13 : dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
164 13 : sampler->cutoff = (uint64) dcutoff;
165 13 : sampler->seed = seed;
166 13 : sampler->nextblock = 0;
167 13 : sampler->lt = InvalidOffsetNumber;
168 :
169 : /*
170 : * Bulkread buffer access strategy probably makes sense unless we're
171 : * scanning a very small fraction of the table. The 1% cutoff here is a
172 : * guess. We should use pagemode visibility checking, since we scan all
173 : * tuples on each selected page.
174 : */
175 13 : node->use_bulkread = (percent >= 1);
176 13 : node->use_pagemode = true;
177 13 : }
178 :
179 : /*
180 : * Select next block to sample.
181 : */
182 : static BlockNumber
183 723 : system_nextsampleblock(SampleScanState *node)
184 : {
185 723 : SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
186 723 : HeapScanDesc scan = node->ss.ss_currentScanDesc;
187 723 : BlockNumber nextblock = sampler->nextblock;
188 : uint32 hashinput[2];
189 :
190 : /*
191 : * We compute the hash by applying hash_any to an array of 2 uint32's
192 : * containing the block number and seed. This is efficient to set up, and
193 : * with the current implementation of hash_any, it gives
194 : * machine-independent results, which is a nice property for regression
195 : * testing.
196 : *
197 : * These words in the hash input are the same throughout the block:
198 : */
199 723 : hashinput[1] = sampler->seed;
200 :
201 : /*
202 : * Loop over block numbers until finding suitable block or reaching end of
203 : * relation.
204 : */
205 1425 : for (; nextblock < scan->rs_nblocks; nextblock++)
206 : {
207 : uint32 hash;
208 :
209 1414 : hashinput[0] = nextblock;
210 :
211 1414 : hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
212 : (int) sizeof(hashinput)));
213 1414 : if (hash < sampler->cutoff)
214 712 : break;
215 : }
216 :
217 723 : if (nextblock < scan->rs_nblocks)
218 : {
219 : /* Found a suitable block; remember where we should start next time */
220 712 : sampler->nextblock = nextblock + 1;
221 712 : return nextblock;
222 : }
223 :
224 : /* Done, but let's reset nextblock to 0 for safety. */
225 11 : sampler->nextblock = 0;
226 11 : return InvalidBlockNumber;
227 : }
228 :
229 : /*
230 : * Select next sampled tuple in current block.
231 : *
232 : * In block sampling, we just want to sample all the tuples in each selected
233 : * block.
234 : *
235 : * It is OK here to return an offset without knowing if the tuple is visible
236 : * (or even exists); nodeSamplescan.c will deal with that.
237 : *
238 : * When we reach end of the block, return InvalidOffsetNumber which tells
239 : * SampleScan to go to next block.
240 : */
241 : static OffsetNumber
242 20770 : system_nextsampletuple(SampleScanState *node,
243 : BlockNumber blockno,
244 : OffsetNumber maxoffset)
245 : {
246 20770 : SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
247 20770 : OffsetNumber tupoffset = sampler->lt;
248 :
249 : /* Advance to next possible offset on page */
250 20770 : if (tupoffset == InvalidOffsetNumber)
251 712 : tupoffset = FirstOffsetNumber;
252 : else
253 20058 : tupoffset++;
254 :
255 : /* Done? */
256 20770 : if (tupoffset > maxoffset)
257 710 : tupoffset = InvalidOffsetNumber;
258 :
259 20770 : sampler->lt = tupoffset;
260 :
261 20770 : return tupoffset;
262 : }
|