LCOV - code coverage report
Current view: top level - src/backend/access/gin - ginpostinglist.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 108 142 76.1 %
Date: 2017-09-29 15:12:54 Functions: 8 9 88.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ginpostinglist.c
       4             :  *    routines for dealing with posting lists.
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *          src/backend/access/gin/ginpostinglist.c
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : 
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gin_private.h"
      18             : 
      19             : #ifdef USE_ASSERT_CHECKING
      20             : #define CHECK_ENCODING_ROUNDTRIP
      21             : #endif
      22             : 
      23             : /*
      24             :  * For encoding purposes, item pointers are represented as 64-bit unsigned
      25             :  * integers. The lowest 11 bits represent the offset number, and the next
      26             :  * lowest 32 bits are the block number. That leaves 17 bits unused, i.e.
      27             :  * only 43 low bits are used.
      28             :  *
      29             :  * These 43-bit integers are encoded using varbyte encoding. In each byte,
      30             :  * the 7 low bits contain data, while the highest bit is a continuation bit.
      31             :  * When the continuation bit is set, the next byte is part of the same
      32             :  * integer, otherwise this is the last byte of this integer.  43 bits fit
      33             :  * conveniently in at most 6 bytes when varbyte encoded (the 6th byte does
      34             :  * not need a continuation bit, because we know the max size to be 43 bits):
      35             :  *
      36             :  * 0XXXXXXX
      37             :  * 1XXXXXXX 0XXXXYYY
      38             :  * 1XXXXXXX 1XXXXYYY 0YYYYYYY
      39             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
      40             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
      41             :  * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY YYYYYYYY
      42             :  *
      43             :  * X = bits used for offset number
      44             :  * Y = bits used for block number
      45             :  *
      46             :  * The bytes are in stored in little-endian order.
      47             :  *
      48             :  * An important property of this encoding is that removing an item from list
      49             :  * never increases the size of the resulting compressed posting list. Proof:
      50             :  *
      51             :  * Removing number is actually replacement of two numbers with their sum. We
      52             :  * have to prove that varbyte encoding of a sum can't be longer than varbyte
      53             :  * encoding of its summands. Sum of two numbers is at most one bit wider than
      54             :  * the larger of the summands. Widening a number by one bit enlarges its length
      55             :  * in varbyte encoding by at most one byte. Therefore, varbyte encoding of sum
      56             :  * is at most one byte longer than varbyte encoding of larger summand. Lesser
      57             :  * summand is at least one byte, so the sum cannot take more space than the
      58             :  * summands, Q.E.D.
      59             :  *
      60             :  * This property greatly simplifies VACUUM, which can assume that posting
      61             :  * lists always fit on the same page after vacuuming. Note that even though
      62             :  * that holds for removing items from a posting list, you must also be
      63             :  * careful to not cause expansion e.g. when merging uncompressed items on the
      64             :  * page into the compressed lists, when vacuuming.
      65             :  */
      66             : 
      67             : /*
      68             :  * How many bits do you need to encode offset number? OffsetNumber is a 16-bit
      69             :  * integer, but you can't fit that many items on a page. 11 ought to be more
      70             :  * than enough. It's tempting to derive this from MaxHeapTuplesPerPage, and
      71             :  * use the minimum number of bits, but that would require changing the on-disk
      72             :  * format if MaxHeapTuplesPerPage changes. Better to leave some slack.
      73             :  */
      74             : #define MaxHeapTuplesPerPageBits        11
      75             : 
      76             : static inline uint64
      77     2222604 : itemptr_to_uint64(const ItemPointer iptr)
      78             : {
      79             :     uint64      val;
      80             : 
      81     2222604 :     Assert(ItemPointerIsValid(iptr));
      82     2222604 :     Assert(GinItemPointerGetOffsetNumber(iptr) < (1 << MaxHeapTuplesPerPageBits));
      83             : 
      84     2222604 :     val = GinItemPointerGetBlockNumber(iptr);
      85     2222604 :     val <<= MaxHeapTuplesPerPageBits;
      86     2222604 :     val |= GinItemPointerGetOffsetNumber(iptr);
      87             : 
      88     2222604 :     return val;
      89             : }
      90             : 
      91             : static inline void
      92     4232993 : uint64_to_itemptr(uint64 val, ItemPointer iptr)
      93             : {
      94     4232993 :     GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1));
      95     4232993 :     val = val >> MaxHeapTuplesPerPageBits;
      96     4232993 :     GinItemPointerSetBlockNumber(iptr, val);
      97             : 
      98     4232993 :     Assert(ItemPointerIsValid(iptr));
      99     4232993 : }
     100             : 
     101             : /*
     102             :  * Varbyte-encode 'val' into *ptr. *ptr is incremented to next integer.
     103             :  */
     104             : static void
     105     2105700 : encode_varbyte(uint64 val, unsigned char **ptr)
     106             : {
     107     2105700 :     unsigned char *p = *ptr;
     108             : 
     109     4265335 :     while (val > 0x7F)
     110             :     {
     111       53935 :         *(p++) = 0x80 | (val & 0x7F);
     112       53935 :         val >>= 7;
     113             :     }
     114     2105700 :     *(p++) = (unsigned char) val;
     115             : 
     116     2105700 :     *ptr = p;
     117     2105700 : }
     118             : 
     119             : /*
     120             :  * Decode varbyte-encoded integer at *ptr. *ptr is incremented to next integer.
     121             :  */
     122             : static uint64
     123     4232993 : decode_varbyte(unsigned char **ptr)
     124             : {
     125             :     uint64      val;
     126     4232993 :     unsigned char *p = *ptr;
     127             :     uint64      c;
     128             : 
     129     4232993 :     c = *(p++);
     130     4232993 :     val = c & 0x7F;
     131     4232993 :     if (c & 0x80)
     132             :     {
     133       96831 :         c = *(p++);
     134       96831 :         val |= (c & 0x7F) << 7;
     135       96831 :         if (c & 0x80)
     136             :         {
     137        9663 :             c = *(p++);
     138        9663 :             val |= (c & 0x7F) << 14;
     139        9663 :             if (c & 0x80)
     140             :             {
     141           0 :                 c = *(p++);
     142           0 :                 val |= (c & 0x7F) << 21;
     143           0 :                 if (c & 0x80)
     144             :                 {
     145           0 :                     c = *(p++);
     146           0 :                     val |= (c & 0x7F) << 28;
     147           0 :                     if (c & 0x80)
     148             :                     {
     149           0 :                         c = *(p++);
     150           0 :                         val |= (c & 0x7F) << 35;
     151           0 :                         if (c & 0x80)
     152             :                         {
     153             :                             /* last byte, no continuation bit */
     154           0 :                             c = *(p++);
     155           0 :                             val |= c << 42;
     156             :                         }
     157             :                     }
     158             :                 }
     159             :             }
     160             :         }
     161             :     }
     162             : 
     163     4232993 :     *ptr = p;
     164             : 
     165     4232993 :     return val;
     166             : }
     167             : 
     168             : /*
     169             :  * Encode a posting list.
     170             :  *
     171             :  * The encoded list is returned in a palloc'd struct, which will be at most
     172             :  * 'maxsize' bytes in size.  The number items in the returned segment is
     173             :  * returned in *nwritten. If it's not equal to nipd, not all the items fit
     174             :  * in 'maxsize', and only the first *nwritten were encoded.
     175             :  *
     176             :  * The allocated size of the returned struct is short-aligned, and the padding
     177             :  * byte at the end, if any, is zero.
     178             :  */
     179             : GinPostingList *
     180       42674 : ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
     181             :                        int *nwritten)
     182             : {
     183             :     uint64      prev;
     184       42674 :     int         totalpacked = 0;
     185             :     int         maxbytes;
     186             :     GinPostingList *result;
     187             :     unsigned char *ptr;
     188             :     unsigned char *endptr;
     189             : 
     190       42674 :     maxsize = SHORTALIGN_DOWN(maxsize);
     191             : 
     192       42674 :     result = palloc(maxsize);
     193             : 
     194       42674 :     maxbytes = maxsize - offsetof(GinPostingList, bytes);
     195       42674 :     Assert(maxbytes > 0);
     196             : 
     197             :     /* Store the first special item */
     198       42674 :     result->first = ipd[0];
     199             : 
     200       42674 :     prev = itemptr_to_uint64(&result->first);
     201             : 
     202       42674 :     ptr = result->bytes;
     203       42674 :     endptr = result->bytes + maxbytes;
     204     2148185 :     for (totalpacked = 1; totalpacked < nipd; totalpacked++)
     205             :     {
     206     2105700 :         uint64      val = itemptr_to_uint64(&ipd[totalpacked]);
     207     2105700 :         uint64      delta = val - prev;
     208             : 
     209     2105700 :         Assert(val > prev);
     210             : 
     211     2105700 :         if (endptr - ptr >= 6)
     212     2104561 :             encode_varbyte(delta, &ptr);
     213             :         else
     214             :         {
     215             :             /*
     216             :              * There are less than 6 bytes left. Have to check if the next
     217             :              * item fits in that space before writing it out.
     218             :              */
     219             :             unsigned char buf[6];
     220        1139 :             unsigned char *p = buf;
     221             : 
     222        1139 :             encode_varbyte(delta, &p);
     223        1139 :             if (p - buf > (endptr - ptr))
     224         189 :                 break;          /* output is full */
     225             : 
     226         950 :             memcpy(ptr, buf, p - buf);
     227         950 :             ptr += (p - buf);
     228             :         }
     229     2105511 :         prev = val;
     230             :     }
     231       42674 :     result->nbytes = ptr - result->bytes;
     232             : 
     233             :     /*
     234             :      * If we wrote an odd number of bytes, zero out the padding byte at the
     235             :      * end.
     236             :      */
     237       42674 :     if (result->nbytes != SHORTALIGN(result->nbytes))
     238        7218 :         result->bytes[result->nbytes] = 0;
     239             : 
     240       42674 :     if (nwritten)
     241        3527 :         *nwritten = totalpacked;
     242             : 
     243       42674 :     Assert(SizeOfGinPostingList(result) <= maxsize);
     244             : 
     245             :     /*
     246             :      * Check that the encoded segment decodes back to the original items.
     247             :      */
     248             : #if defined (CHECK_ENCODING_ROUNDTRIP)
     249             :     {
     250             :         int         ndecoded;
     251       42674 :         ItemPointer tmp = ginPostingListDecode(result, &ndecoded);
     252             :         int         i;
     253             : 
     254       42674 :         Assert(ndecoded == totalpacked);
     255     2190859 :         for (i = 0; i < ndecoded; i++)
     256     2148185 :             Assert(memcmp(&tmp[i], &ipd[i], sizeof(ItemPointerData)) == 0);
     257       42674 :         pfree(tmp);
     258             :     }
     259             : #endif
     260             : 
     261       42674 :     return result;
     262             : }
     263             : 
     264             : /*
     265             :  * Decode a compressed posting list into an array of item pointers.
     266             :  * The number of items is returned in *ndecoded.
     267             :  */
     268             : ItemPointer
     269       74084 : ginPostingListDecode(GinPostingList *plist, int *ndecoded)
     270             : {
     271       74084 :     return ginPostingListDecodeAllSegments(plist,
     272       74084 :                                            SizeOfGinPostingList(plist),
     273             :                                            ndecoded);
     274             : }
     275             : 
     276             : /*
     277             :  * Decode multiple posting list segments into an array of item pointers.
     278             :  * The number of items is returned in *ndecoded_out. The segments are stored
     279             :  * one after each other, with total size 'len' bytes.
     280             :  */
     281             : ItemPointer
     282       74090 : ginPostingListDecodeAllSegments(GinPostingList *segment, int len, int *ndecoded_out)
     283             : {
     284             :     ItemPointer result;
     285             :     int         nallocated;
     286             :     uint64      val;
     287       74090 :     char       *endseg = ((char *) segment) + len;
     288             :     int         ndecoded;
     289             :     unsigned char *ptr;
     290             :     unsigned char *endptr;
     291             : 
     292             :     /*
     293             :      * Guess an initial size of the array.
     294             :      */
     295       74090 :     nallocated = segment->nbytes * 2 + 1;
     296       74090 :     result = palloc(nallocated * sizeof(ItemPointerData));
     297             : 
     298       74090 :     ndecoded = 0;
     299      222410 :     while ((char *) segment < endseg)
     300             :     {
     301             :         /* enlarge output array if needed */
     302       74230 :         if (ndecoded >= nallocated)
     303             :         {
     304           0 :             nallocated *= 2;
     305           0 :             result = repalloc(result, nallocated * sizeof(ItemPointerData));
     306             :         }
     307             : 
     308             :         /* copy the first item */
     309       74230 :         Assert(OffsetNumberIsValid(ItemPointerGetOffsetNumber(&segment->first)));
     310       74230 :         Assert(ndecoded == 0 || ginCompareItemPointers(&segment->first, &result[ndecoded - 1]) > 0);
     311       74230 :         result[ndecoded] = segment->first;
     312       74230 :         ndecoded++;
     313             : 
     314       74230 :         val = itemptr_to_uint64(&segment->first);
     315       74230 :         ptr = segment->bytes;
     316       74230 :         endptr = segment->bytes + segment->nbytes;
     317     4381453 :         while (ptr < endptr)
     318             :         {
     319             :             /* enlarge output array if needed */
     320     4232993 :             if (ndecoded >= nallocated)
     321             :             {
     322          24 :                 nallocated *= 2;
     323          24 :                 result = repalloc(result, nallocated * sizeof(ItemPointerData));
     324             :             }
     325             : 
     326     4232993 :             val += decode_varbyte(&ptr);
     327             : 
     328     4232993 :             uint64_to_itemptr(val, &result[ndecoded]);
     329     4232993 :             ndecoded++;
     330             :         }
     331       74230 :         segment = GinNextPostingListSegment(segment);
     332             :     }
     333             : 
     334       74090 :     if (ndecoded_out)
     335       74090 :         *ndecoded_out = ndecoded;
     336       74090 :     return result;
     337             : }
     338             : 
     339             : /*
     340             :  * Add all item pointers from a bunch of posting lists to a TIDBitmap.
     341             :  */
     342             : int
     343           0 : ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int len,
     344             :                                      TIDBitmap *tbm)
     345             : {
     346             :     int         ndecoded;
     347             :     ItemPointer items;
     348             : 
     349           0 :     items = ginPostingListDecodeAllSegments(ptr, len, &ndecoded);
     350           0 :     tbm_add_tuples(tbm, items, ndecoded, false);
     351           0 :     pfree(items);
     352             : 
     353           0 :     return ndecoded;
     354             : }
     355             : 
     356             : /*
     357             :  * Merge two ordered arrays of itempointers, eliminating any duplicates.
     358             :  *
     359             :  * Returns a palloc'd array, and *nmerged is set to the number of items in
     360             :  * the result, after eliminating duplicates.
     361             :  */
     362             : ItemPointer
     363        6981 : ginMergeItemPointers(ItemPointerData *a, uint32 na,
     364             :                      ItemPointerData *b, uint32 nb,
     365             :                      int *nmerged)
     366             : {
     367             :     ItemPointerData *dst;
     368             : 
     369        6981 :     dst = (ItemPointer) palloc((na + nb) * sizeof(ItemPointerData));
     370             : 
     371             :     /*
     372             :      * If the argument arrays don't overlap, we can just append them to each
     373             :      * other.
     374             :      */
     375        6981 :     if (na == 0 || nb == 0 || ginCompareItemPointers(&a[na - 1], &b[0]) < 0)
     376             :     {
     377        3316 :         memcpy(dst, a, na * sizeof(ItemPointerData));
     378        3316 :         memcpy(&dst[na], b, nb * sizeof(ItemPointerData));
     379        3316 :         *nmerged = na + nb;
     380             :     }
     381        3665 :     else if (ginCompareItemPointers(&b[nb - 1], &a[0]) < 0)
     382             :     {
     383        3665 :         memcpy(dst, b, nb * sizeof(ItemPointerData));
     384        3665 :         memcpy(&dst[nb], a, na * sizeof(ItemPointerData));
     385        3665 :         *nmerged = na + nb;
     386             :     }
     387             :     else
     388             :     {
     389           0 :         ItemPointerData *dptr = dst;
     390           0 :         ItemPointerData *aptr = a;
     391           0 :         ItemPointerData *bptr = b;
     392             : 
     393           0 :         while (aptr - a < na && bptr - b < nb)
     394             :         {
     395           0 :             int         cmp = ginCompareItemPointers(aptr, bptr);
     396             : 
     397           0 :             if (cmp > 0)
     398           0 :                 *dptr++ = *bptr++;
     399           0 :             else if (cmp == 0)
     400             :             {
     401             :                 /* only keep one copy of the identical items */
     402           0 :                 *dptr++ = *bptr++;
     403           0 :                 aptr++;
     404             :             }
     405             :             else
     406           0 :                 *dptr++ = *aptr++;
     407             :         }
     408             : 
     409           0 :         while (aptr - a < na)
     410           0 :             *dptr++ = *aptr++;
     411             : 
     412           0 :         while (bptr - b < nb)
     413           0 :             *dptr++ = *bptr++;
     414             : 
     415           0 :         *nmerged = dptr - dst;
     416             :     }
     417             : 
     418        6981 :     return dst;
     419             : }

Generated by: LCOV version 1.11