Line data Source code
1 : /*
2 : * brin_tuples.c
3 : * Method implementations for tuples in BRIN indexes.
4 : *
5 : * Intended usage is that code outside this file only deals with
6 : * BrinMemTuples, and convert to and from the on-disk representation through
7 : * functions in this file.
8 : *
9 : * NOTES
10 : *
11 : * A BRIN tuple is similar to a heap tuple, with a few key differences. The
12 : * first interesting difference is that the tuple header is much simpler, only
13 : * containing its total length and a small area for flags. Also, the stored
14 : * data does not match the relation tuple descriptor exactly: for each
15 : * attribute in the descriptor, the index tuple carries an arbitrary number
16 : * of values, depending on the opclass.
17 : *
18 : * Also, for each column of the index relation there are two null bits: one
19 : * (hasnulls) stores whether any tuple within the page range has that column
20 : * set to null; the other one (allnulls) stores whether the column values are
21 : * all null. If allnulls is true, then the tuple data area does not contain
22 : * values for that column at all; whereas it does if the hasnulls is set.
23 : * Note the size of the null bitmask may not be the same as that of the
24 : * datum array.
25 : *
26 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
27 : * Portions Copyright (c) 1994, Regents of the University of California
28 : *
29 : * IDENTIFICATION
30 : * src/backend/access/brin/brin_tuple.c
31 : */
32 : #include "postgres.h"
33 :
34 : #include "access/htup_details.h"
35 : #include "access/brin_tuple.h"
36 : #include "access/tupdesc.h"
37 : #include "access/tupmacs.h"
38 : #include "utils/datum.h"
39 : #include "utils/memutils.h"
40 :
41 :
42 : static inline void brin_deconstruct_tuple(BrinDesc *brdesc,
43 : char *tp, bits8 *nullbits, bool nulls,
44 : Datum *values, bool *allnulls, bool *hasnulls);
45 :
46 :
47 : /*
48 : * Return a tuple descriptor used for on-disk storage of BRIN tuples.
49 : */
50 : static TupleDesc
51 25788 : brtuple_disk_tupdesc(BrinDesc *brdesc)
52 : {
53 : /* We cache these in the BrinDesc */
54 25788 : if (brdesc->bd_disktdesc == NULL)
55 : {
56 : int i;
57 : int j;
58 304 : AttrNumber attno = 1;
59 : TupleDesc tupdesc;
60 : MemoryContext oldcxt;
61 :
62 : /* make sure it's in the bdesc's context */
63 304 : oldcxt = MemoryContextSwitchTo(brdesc->bd_context);
64 :
65 304 : tupdesc = CreateTemplateTupleDesc(brdesc->bd_totalstored, false);
66 :
67 7916 : for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
68 : {
69 23844 : for (j = 0; j < brdesc->bd_info[i]->oi_nstored; j++)
70 16232 : TupleDescInitEntry(tupdesc, attno++, NULL,
71 16232 : brdesc->bd_info[i]->oi_typcache[j]->type_id,
72 : -1, 0);
73 : }
74 :
75 304 : MemoryContextSwitchTo(oldcxt);
76 :
77 304 : brdesc->bd_disktdesc = tupdesc;
78 : }
79 :
80 25788 : return brdesc->bd_disktdesc;
81 : }
82 :
83 : /*
84 : * Generate a new on-disk tuple to be inserted in a BRIN index.
85 : *
86 : * See brin_form_placeholder_tuple if you touch this.
87 : */
88 : BrinTuple *
89 365 : brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
90 : Size *size)
91 : {
92 : Datum *values;
93 : bool *nulls;
94 365 : bool anynulls = false;
95 : BrinTuple *rettuple;
96 : int keyno;
97 : int idxattno;
98 365 : uint16 phony_infomask = 0;
99 : bits8 *phony_nullbitmap;
100 : Size len,
101 : hoff,
102 : data_len;
103 :
104 365 : Assert(brdesc->bd_totalstored > 0);
105 :
106 365 : values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored);
107 365 : nulls = (bool *) palloc0(sizeof(bool) * brdesc->bd_totalstored);
108 365 : phony_nullbitmap = (bits8 *)
109 365 : palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored));
110 :
111 : /*
112 : * Set up the values/nulls arrays for heap_fill_tuple
113 : */
114 365 : idxattno = 0;
115 9894 : for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
116 : {
117 : int datumno;
118 :
119 : /*
120 : * "allnulls" is set when there's no nonnull value in any row in the
121 : * column; when this happens, there is no data to store. Thus set the
122 : * nullable bits for all data elements of this column and we're done.
123 : */
124 9529 : if (tuple->bt_columns[keyno].bv_allnulls)
125 : {
126 4 : for (datumno = 0;
127 3 : datumno < brdesc->bd_info[keyno]->oi_nstored;
128 2 : datumno++)
129 2 : nulls[idxattno++] = true;
130 1 : anynulls = true;
131 1 : continue;
132 : }
133 :
134 : /*
135 : * The "hasnulls" bit is set when there are some null values in the
136 : * data. We still need to store a real value, but the presence of
137 : * this means we need a null bitmap.
138 : */
139 9528 : if (tuple->bt_columns[keyno].bv_hasnulls)
140 475 : anynulls = true;
141 :
142 39376 : for (datumno = 0;
143 29848 : datumno < brdesc->bd_info[keyno]->oi_nstored;
144 20320 : datumno++)
145 20320 : values[idxattno++] = tuple->bt_columns[keyno].bv_values[datumno];
146 : }
147 :
148 : /* Assert we did not overrun temp arrays */
149 365 : Assert(idxattno <= brdesc->bd_totalstored);
150 :
151 : /* compute total space needed */
152 365 : len = SizeOfBrinTuple;
153 365 : if (anynulls)
154 : {
155 : /*
156 : * We need a double-length bitmap on an on-disk BRIN index tuple; the
157 : * first half stores the "allnulls" bits, the second stores
158 : * "hasnulls".
159 : */
160 20 : len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
161 : }
162 :
163 365 : len = hoff = MAXALIGN(len);
164 :
165 365 : data_len = heap_compute_data_size(brtuple_disk_tupdesc(brdesc),
166 : values, nulls);
167 365 : len += data_len;
168 :
169 365 : len = MAXALIGN(len);
170 :
171 365 : rettuple = palloc0(len);
172 365 : rettuple->bt_blkno = blkno;
173 365 : rettuple->bt_info = hoff;
174 :
175 : /* Assert that hoff fits in the space available */
176 365 : Assert((rettuple->bt_info & BRIN_OFFSET_MASK) == hoff);
177 :
178 : /*
179 : * The infomask and null bitmap as computed by heap_fill_tuple are useless
180 : * to us. However, that function will not accept a null infomask; and we
181 : * need to pass a valid null bitmap so that it will correctly skip
182 : * outputting null attributes in the data area.
183 : */
184 365 : heap_fill_tuple(brtuple_disk_tupdesc(brdesc),
185 : values,
186 : nulls,
187 : (char *) rettuple + hoff,
188 : data_len,
189 : &phony_infomask,
190 : phony_nullbitmap);
191 :
192 : /* done with these */
193 365 : pfree(values);
194 365 : pfree(nulls);
195 365 : pfree(phony_nullbitmap);
196 :
197 : /*
198 : * Now fill in the real null bitmasks. allnulls first.
199 : */
200 365 : if (anynulls)
201 : {
202 : bits8 *bitP;
203 : int bitmask;
204 :
205 20 : rettuple->bt_info |= BRIN_NULLS_MASK;
206 :
207 : /*
208 : * Note that we reverse the sense of null bits in this module: we
209 : * store a 1 for a null attribute rather than a 0. So we must reverse
210 : * the sense of the att_isnull test in br_deconstruct_tuple as well.
211 : */
212 20 : bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
213 20 : bitmask = HIGHBIT;
214 591 : for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
215 : {
216 571 : if (bitmask != HIGHBIT)
217 494 : bitmask <<= 1;
218 : else
219 : {
220 77 : bitP += 1;
221 77 : *bitP = 0x0;
222 77 : bitmask = 1;
223 : }
224 :
225 571 : if (!tuple->bt_columns[keyno].bv_allnulls)
226 570 : continue;
227 :
228 1 : *bitP |= bitmask;
229 : }
230 : /* hasnulls bits follow */
231 591 : for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
232 : {
233 571 : if (bitmask != HIGHBIT)
234 495 : bitmask <<= 1;
235 : else
236 : {
237 76 : bitP += 1;
238 76 : *bitP = 0x0;
239 76 : bitmask = 1;
240 : }
241 :
242 571 : if (!tuple->bt_columns[keyno].bv_hasnulls)
243 96 : continue;
244 :
245 475 : *bitP |= bitmask;
246 : }
247 20 : bitP = ((bits8 *) (rettuple + SizeOfBrinTuple)) - 1;
248 : }
249 :
250 365 : if (tuple->bt_placeholder)
251 0 : rettuple->bt_info |= BRIN_PLACEHOLDER_MASK;
252 :
253 365 : *size = len;
254 365 : return rettuple;
255 : }
256 :
257 : /*
258 : * Generate a new on-disk tuple with no data values, marked as placeholder.
259 : *
260 : * This is a cut-down version of brin_form_tuple.
261 : */
262 : BrinTuple *
263 7 : brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
264 : {
265 : Size len;
266 : Size hoff;
267 : BrinTuple *rettuple;
268 : int keyno;
269 : bits8 *bitP;
270 : int bitmask;
271 :
272 : /* compute total space needed: always add nulls */
273 7 : len = SizeOfBrinTuple;
274 7 : len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
275 7 : len = hoff = MAXALIGN(len);
276 :
277 7 : rettuple = palloc0(len);
278 7 : rettuple->bt_blkno = blkno;
279 7 : rettuple->bt_info = hoff;
280 7 : rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK;
281 :
282 7 : bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
283 7 : bitmask = HIGHBIT;
284 : /* set allnulls true for all attributes */
285 188 : for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
286 : {
287 181 : if (bitmask != HIGHBIT)
288 156 : bitmask <<= 1;
289 : else
290 : {
291 25 : bitP += 1;
292 25 : *bitP = 0x0;
293 25 : bitmask = 1;
294 : }
295 :
296 181 : *bitP |= bitmask;
297 : }
298 : /* no need to set hasnulls */
299 :
300 7 : *size = len;
301 7 : return rettuple;
302 : }
303 :
304 : /*
305 : * Free a tuple created by brin_form_tuple
306 : */
307 : void
308 14 : brin_free_tuple(BrinTuple *tuple)
309 : {
310 14 : pfree(tuple);
311 14 : }
312 :
313 : /*
314 : * Given a brin tuple of size len, create a copy of it. If 'dest' is not
315 : * NULL, its size is destsz, and can be used as output buffer; if the tuple
316 : * to be copied does not fit, it is enlarged by repalloc, and the size is
317 : * updated to match. This avoids palloc/free cycles when many brin tuples
318 : * are being processed in loops.
319 : */
320 : BrinTuple *
321 25011 : brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
322 : {
323 25011 : if (!destsz || *destsz == 0)
324 25011 : dest = palloc(len);
325 0 : else if (len > *destsz)
326 : {
327 0 : dest = repalloc(dest, len);
328 0 : *destsz = len;
329 : }
330 :
331 25011 : memcpy(dest, tuple, len);
332 :
333 25011 : return dest;
334 : }
335 :
336 : /*
337 : * Return whether two BrinTuples are bitwise identical.
338 : */
339 : bool
340 218 : brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen)
341 : {
342 218 : if (alen != blen)
343 0 : return false;
344 218 : if (memcmp(a, b, alen) != 0)
345 0 : return false;
346 218 : return true;
347 : }
348 :
349 : /*
350 : * Create a new BrinMemTuple from scratch, and initialize it to an empty
351 : * state.
352 : *
353 : * Note: we don't provide any means to free a deformed tuple, so make sure to
354 : * use a temporary memory context.
355 : */
356 : BrinMemTuple *
357 512 : brin_new_memtuple(BrinDesc *brdesc)
358 : {
359 : BrinMemTuple *dtup;
360 : long basesize;
361 :
362 512 : basesize = MAXALIGN(sizeof(BrinMemTuple) +
363 : sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
364 512 : dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
365 :
366 512 : dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
367 512 : dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
368 512 : dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
369 :
370 512 : dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
371 : "brin dtuple",
372 : ALLOCSET_DEFAULT_SIZES);
373 :
374 512 : brin_memtuple_initialize(dtup, brdesc);
375 :
376 512 : return dtup;
377 : }
378 :
379 : /*
380 : * Reset a BrinMemTuple to initial state. We return the same tuple, for
381 : * notational convenience.
382 : */
383 : BrinMemTuple *
384 25468 : brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
385 : {
386 : int i;
387 : char *currdatum;
388 :
389 25468 : MemoryContextReset(dtuple->bt_context);
390 :
391 25468 : currdatum = (char *) dtuple +
392 25468 : MAXALIGN(sizeof(BrinMemTuple) +
393 : sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
394 786579 : for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
395 : {
396 761111 : dtuple->bt_columns[i].bv_allnulls = true;
397 761111 : dtuple->bt_columns[i].bv_hasnulls = false;
398 :
399 761111 : dtuple->bt_columns[i].bv_attno = i + 1;
400 761111 : dtuple->bt_columns[i].bv_allnulls = true;
401 761111 : dtuple->bt_columns[i].bv_hasnulls = false;
402 761111 : dtuple->bt_columns[i].bv_values = (Datum *) currdatum;
403 761111 : currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
404 : }
405 :
406 25468 : return dtuple;
407 : }
408 :
409 : /*
410 : * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of
411 : * brin_form_tuple.
412 : *
413 : * As an optimization, the caller can pass a previously allocated 'dMemtuple'.
414 : * This avoids having to allocate it here, which can be useful when this
415 : * function is called many times in a loop. It is caller's responsibility
416 : * that the given BrinMemTuple matches what we need here.
417 : *
418 : * Note we don't need the "on disk tupdesc" here; we rely on our own routine to
419 : * deconstruct the tuple from the on-disk format.
420 : */
421 : BrinMemTuple *
422 25058 : brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
423 : {
424 : BrinMemTuple *dtup;
425 : Datum *values;
426 : bool *allnulls;
427 : bool *hasnulls;
428 : char *tp;
429 : bits8 *nullbits;
430 : int keyno;
431 : int valueno;
432 : MemoryContext oldcxt;
433 :
434 25058 : dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) :
435 : brin_new_memtuple(brdesc);
436 :
437 25058 : if (BrinTupleIsPlaceholder(tuple))
438 0 : dtup->bt_placeholder = true;
439 25058 : dtup->bt_blkno = tuple->bt_blkno;
440 :
441 25058 : values = dtup->bt_values;
442 25058 : allnulls = dtup->bt_allnulls;
443 25058 : hasnulls = dtup->bt_hasnulls;
444 :
445 25058 : tp = (char *) tuple + BrinTupleDataOffset(tuple);
446 :
447 25058 : if (BrinTupleHasNulls(tuple))
448 1501 : nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple);
449 : else
450 23557 : nullbits = NULL;
451 25058 : brin_deconstruct_tuple(brdesc,
452 25058 : tp, nullbits, BrinTupleHasNulls(tuple),
453 : values, allnulls, hasnulls);
454 :
455 : /*
456 : * Iterate to assign each of the values to the corresponding item in the
457 : * values array of each column. The copies occur in the tuple's context.
458 : */
459 25058 : oldcxt = MemoryContextSwitchTo(dtup->bt_context);
460 775406 : for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
461 : {
462 : int i;
463 :
464 750348 : if (allnulls[keyno])
465 : {
466 1 : valueno += brdesc->bd_info[keyno]->oi_nstored;
467 1 : continue;
468 : }
469 :
470 : /*
471 : * We would like to skip datumCopy'ing the values datum in some cases,
472 : * caller permitting ...
473 : */
474 2351081 : for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++)
475 3201468 : dtup->bt_columns[keyno].bv_values[i] =
476 3201468 : datumCopy(values[valueno++],
477 1600734 : brdesc->bd_info[keyno]->oi_typcache[i]->typbyval,
478 1600734 : brdesc->bd_info[keyno]->oi_typcache[i]->typlen);
479 :
480 750347 : dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno];
481 750347 : dtup->bt_columns[keyno].bv_allnulls = false;
482 : }
483 :
484 25058 : MemoryContextSwitchTo(oldcxt);
485 :
486 25058 : return dtup;
487 : }
488 :
489 : /*
490 : * brin_deconstruct_tuple
491 : * Guts of attribute extraction from an on-disk BRIN tuple.
492 : *
493 : * Its arguments are:
494 : * brdesc BRIN descriptor for the stored tuple
495 : * tp pointer to the tuple data area
496 : * nullbits pointer to the tuple nulls bitmask
497 : * nulls "has nulls" bit in tuple infomask
498 : * values output values, array of size brdesc->bd_totalstored
499 : * allnulls output "allnulls", size brdesc->bd_tupdesc->natts
500 : * hasnulls output "hasnulls", size brdesc->bd_tupdesc->natts
501 : *
502 : * Output arrays must have been allocated by caller.
503 : */
504 : static inline void
505 25058 : brin_deconstruct_tuple(BrinDesc *brdesc,
506 : char *tp, bits8 *nullbits, bool nulls,
507 : Datum *values, bool *allnulls, bool *hasnulls)
508 : {
509 : int attnum;
510 : int stored;
511 : TupleDesc diskdsc;
512 : long off;
513 :
514 : /*
515 : * First iterate to natts to obtain both null flags for each attribute.
516 : * Note that we reverse the sense of the att_isnull test, because we store
517 : * 1 for a null value (rather than a 1 for a not null value as is the
518 : * att_isnull convention used elsewhere.) See brin_form_tuple.
519 : */
520 775406 : for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
521 : {
522 : /*
523 : * the "all nulls" bit means that all values in the page range for
524 : * this column are nulls. Therefore there are no values in the tuple
525 : * data area.
526 : */
527 750348 : allnulls[attnum] = nulls && !att_isnull(attnum, nullbits);
528 :
529 : /*
530 : * the "has nulls" bit means that some tuples have nulls, but others
531 : * have not-null values. Therefore we know the tuple contains data
532 : * for this column.
533 : *
534 : * The hasnulls bits follow the allnulls bits in the same bitmask.
535 : */
536 1500696 : hasnulls[attnum] =
537 750348 : nulls && !att_isnull(brdesc->bd_tupdesc->natts + attnum, nullbits);
538 : }
539 :
540 : /*
541 : * Iterate to obtain each attribute's stored values. Note that since we
542 : * may reuse attribute entries for more than one column, we cannot cache
543 : * offsets here.
544 : */
545 25058 : diskdsc = brtuple_disk_tupdesc(brdesc);
546 25058 : stored = 0;
547 25058 : off = 0;
548 775406 : for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
549 : {
550 : int datumno;
551 :
552 750348 : if (allnulls[attnum])
553 : {
554 1 : stored += brdesc->bd_info[attnum]->oi_nstored;
555 1 : continue;
556 : }
557 :
558 3101428 : for (datumno = 0;
559 2351081 : datumno < brdesc->bd_info[attnum]->oi_nstored;
560 1600734 : datumno++)
561 : {
562 1600734 : Form_pg_attribute thisatt = TupleDescAttr(diskdsc, stored);
563 :
564 1600734 : if (thisatt->attlen == -1)
565 : {
566 475190 : off = att_align_pointer(off, thisatt->attalign, -1,
567 : tp + off);
568 : }
569 : else
570 : {
571 : /* not varlena, so safe to use att_align_nominal */
572 1125544 : off = att_align_nominal(off, thisatt->attalign);
573 : }
574 :
575 1600734 : values[stored++] = fetchatt(thisatt, tp + off);
576 :
577 1600734 : off = att_addlength_pointer(off, thisatt->attlen, tp + off);
578 : }
579 : }
580 25058 : }
|