LCOV - PostgreSQL - src/backend/utils/adt/rangetypes

LCOV - code coverage report

Current view:	top level - src/backend/utils/adt - rangetypes_gist.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL	Lines:	381	472	80.7 %
Date:	2017-09-29 13:40:31	Functions:	20	22	90.9 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * rangetypes_gist.c
       4             :  *    GiST support for range types.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/utils/adt/rangetypes_gist.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include "access/gist.h"
      18             : #include "access/stratnum.h"
      19             : #include "utils/builtins.h"
      20             : #include "utils/datum.h"
      21             : #include "utils/rangetypes.h"
      22             : 
      23             : 
      24             : /*
      25             :  * Range class properties used to segregate different classes of ranges in
      26             :  * GiST.  Each unique combination of properties is a class.  CLS_EMPTY cannot
      27             :  * be combined with anything else.
      28             :  */
      29             : #define CLS_NORMAL          0   /* Ordinary finite range (no bits set) */
      30             : #define CLS_LOWER_INF       1   /* Lower bound is infinity */
      31             : #define CLS_UPPER_INF       2   /* Upper bound is infinity */
      32             : #define CLS_CONTAIN_EMPTY   4   /* Contains underlying empty ranges */
      33             : #define CLS_EMPTY           8   /* Special class for empty ranges */
      34             : 
      35             : #define CLS_COUNT           9   /* # of classes; includes all combinations of
      36             :                                  * properties. CLS_EMPTY doesn't combine with
      37             :                                  * anything else, so it's only 2^3 + 1. */
      38             : 
      39             : /*
      40             :  * Minimum accepted ratio of split for items of the same class.  If the items
      41             :  * are of different classes, we will separate along those lines regardless of
      42             :  * the ratio.
      43             :  */
      44             : #define LIMIT_RATIO  0.3
      45             : 
      46             : /* Constants for fixed penalty values */
      47             : #define INFINITE_BOUND_PENALTY  2.0
      48             : #define CONTAIN_EMPTY_PENALTY  1.0
      49             : #define DEFAULT_SUBTYPE_DIFF_PENALTY  1.0
      50             : 
      51             : /*
      52             :  * Per-item data for range_gist_single_sorting_split.
      53             :  */
      54             : typedef struct
      55             : {
      56             :     int         index;
      57             :     RangeBound  bound;
      58             : } SingleBoundSortItem;
      59             : 
      60             : /* place on left or right side of split? */
      61             : typedef enum
      62             : {
      63             :     SPLIT_LEFT = 0,             /* makes initialization to SPLIT_LEFT easier */
      64             :     SPLIT_RIGHT
      65             : } SplitLR;
      66             : 
      67             : /*
      68             :  * Context for range_gist_consider_split.
      69             :  */
      70             : typedef struct
      71             : {
      72             :     TypeCacheEntry *typcache;   /* typcache for range type */
      73             :     bool        has_subtype_diff;   /* does it have subtype_diff? */
      74             :     int         entries_count;  /* total number of entries being split */
      75             : 
      76             :     /* Information about currently selected split follows */
      77             : 
      78             :     bool        first;          /* true if no split was selected yet */
      79             : 
      80             :     RangeBound *left_upper;     /* upper bound of left interval */
      81             :     RangeBound *right_lower;    /* lower bound of right interval */
      82             : 
      83             :     float4      ratio;          /* split ratio */
      84             :     float4      overlap;        /* overlap between left and right predicate */
      85             :     int         common_left;    /* # common entries destined for each side */
      86             :     int         common_right;
      87             : } ConsiderSplitContext;
      88             : 
      89             : /*
      90             :  * Bounds extracted from a non-empty range, for use in
      91             :  * range_gist_double_sorting_split.
      92             :  */
      93             : typedef struct
      94             : {
      95             :     RangeBound  lower;
      96             :     RangeBound  upper;
      97             : } NonEmptyRange;
      98             : 
      99             : /*
     100             :  * Represents information about an entry that can be placed in either group
     101             :  * without affecting overlap over selected axis ("common entry").
     102             :  */
     103             : typedef struct
     104             : {
     105             :     /* Index of entry in the initial array */
     106             :     int         index;
     107             :     /* Delta between closeness of range to each of the two groups */
     108             :     double      delta;
     109             : } CommonEntry;
     110             : 
     111             : /* Helper macros to place an entry in the left or right group during split */
     112             : /* Note direct access to variables v, typcache, left_range, right_range */
     113             : #define PLACE_LEFT(range, off)                  \
     114             :     do {                                        \
     115             :         if (v->spl_nleft > 0)                 \
     116             :             left_range = range_super_union(typcache, left_range, range); \
     117             :         else                                    \
     118             :             left_range = (range);               \
     119             :         v->spl_left[v->spl_nleft++] = (off);  \
     120             :     } while(0)
     121             : 
     122             : #define PLACE_RIGHT(range, off)                 \
     123             :     do {                                        \
     124             :         if (v->spl_nright > 0)                    \
     125             :             right_range = range_super_union(typcache, right_range, range); \
     126             :         else                                    \
     127             :             right_range = (range);              \
     128             :         v->spl_right[v->spl_nright++] = (off);    \
     129             :     } while(0)
     130             : 
     131             : /* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */
     132             : #define rangeCopy(r) \
     133             :     ((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \
     134             :                                              false, -1)))
     135             : 
     136             : static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType *r1,
     137             :                   RangeType *r2);
     138             : static bool range_gist_consistent_int(TypeCacheEntry *typcache,
     139             :                           StrategyNumber strategy, RangeType *key,
     140             :                           Datum query);
     141             : static bool range_gist_consistent_leaf(TypeCacheEntry *typcache,
     142             :                            StrategyNumber strategy, RangeType *key,
     143             :                            Datum query);
     144             : static void range_gist_fallback_split(TypeCacheEntry *typcache,
     145             :                           GistEntryVector *entryvec,
     146             :                           GIST_SPLITVEC *v);
     147             : static void range_gist_class_split(TypeCacheEntry *typcache,
     148             :                        GistEntryVector *entryvec,
     149             :                        GIST_SPLITVEC *v,
     150             :                        SplitLR *classes_groups);
     151             : static void range_gist_single_sorting_split(TypeCacheEntry *typcache,
     152             :                                 GistEntryVector *entryvec,
     153             :                                 GIST_SPLITVEC *v,
     154             :                                 bool use_upper_bound);
     155             : static void range_gist_double_sorting_split(TypeCacheEntry *typcache,
     156             :                                 GistEntryVector *entryvec,
     157             :                                 GIST_SPLITVEC *v);
     158             : static void range_gist_consider_split(ConsiderSplitContext *context,
     159             :                           RangeBound *right_lower, int min_left_count,
     160             :                           RangeBound *left_upper, int max_left_count);
     161             : static int  get_gist_range_class(RangeType *range);
     162             : static int  single_bound_cmp(const void *a, const void *b, void *arg);
     163             : static int  interval_cmp_lower(const void *a, const void *b, void *arg);
     164             : static int  interval_cmp_upper(const void *a, const void *b, void *arg);
     165             : static int  common_entry_cmp(const void *i1, const void *i2);
     166             : static float8 call_subtype_diff(TypeCacheEntry *typcache,
     167             :                   Datum val1, Datum val2);
     168             : 
     169             : 
     170             : /* GiST query consistency check */
     171             : Datum
     172       53951 : range_gist_consistent(PG_FUNCTION_ARGS)
     173             : {
     174       53951 :     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
     175       53951 :     Datum       query = PG_GETARG_DATUM(1);
     176       53951 :     StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
     177             : 
     178             :     /* Oid subtype = PG_GETARG_OID(3); */
     179       53951 :     bool       *recheck = (bool *) PG_GETARG_POINTER(4);
     180       53951 :     RangeType  *key = DatumGetRangeType(entry->key);
     181             :     TypeCacheEntry *typcache;
     182             : 
     183             :     /* All operators served by this function are exact */
     184       53951 :     *recheck = false;
     185             : 
     186       53951 :     typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
     187             : 
     188       53951 :     if (GIST_LEAF(entry))
     189       53225 :         PG_RETURN_BOOL(range_gist_consistent_leaf(typcache, strategy,
     190             :                                                   key, query));
     191             :     else
     192         726 :         PG_RETURN_BOOL(range_gist_consistent_int(typcache, strategy,
     193             :                                                  key, query));
     194             : }
     195             : 
     196             : /* form union range */
     197             : Datum
     198       11874 : range_gist_union(PG_FUNCTION_ARGS)
     199             : {
     200       11874 :     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
     201       11874 :     GISTENTRY  *ent = entryvec->vector;
     202             :     RangeType  *result_range;
     203             :     TypeCacheEntry *typcache;
     204             :     int         i;
     205             : 
     206       11874 :     result_range = DatumGetRangeType(ent[0].key);
     207             : 
     208       11874 :     typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range));
     209             : 
     210       27600 :     for (i = 1; i < entryvec->n; i++)
     211             :     {
     212       15726 :         result_range = range_super_union(typcache, result_range,
     213       15726 :                                          DatumGetRangeType(ent[i].key));
     214             :     }
     215             : 
     216       11874 :     PG_RETURN_RANGE(result_range);
     217             : }
     218             : 
     219             : /* compress, decompress, fetch are no-ops */
     220             : Datum
     221       18141 : range_gist_compress(PG_FUNCTION_ARGS)
     222             : {
     223       18141 :     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
     224             : 
     225       18141 :     PG_RETURN_POINTER(entry);
     226             : }
     227             : 
     228             : Datum
     229      267814 : range_gist_decompress(PG_FUNCTION_ARGS)
     230             : {
     231      267814 :     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
     232             : 
     233      267814 :     PG_RETURN_POINTER(entry);
     234             : }
     235             : 
     236             : Datum
     237       34468 : range_gist_fetch(PG_FUNCTION_ARGS)
     238             : {
     239       34468 :     GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
     240             : 
     241       34468 :     PG_RETURN_POINTER(entry);
     242             : }
     243             : 
     244             : /*
     245             :  * GiST page split penalty function.
     246             :  *
     247             :  * The penalty function has the following goals (in order from most to least
     248             :  * important):
     249             :  * - Keep normal ranges separate
     250             :  * - Avoid broadening the class of the original predicate
     251             :  * - Avoid broadening (as determined by subtype_diff) the original predicate
     252             :  * - Favor adding ranges to narrower original predicates
     253             :  */
     254             : Datum
     255      159829 : range_gist_penalty(PG_FUNCTION_ARGS)
     256             : {
     257      159829 :     GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
     258      159829 :     GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
     259      159829 :     float      *penalty = (float *) PG_GETARG_POINTER(2);
     260      159829 :     RangeType  *orig = DatumGetRangeType(origentry->key);
     261      159829 :     RangeType  *new = DatumGetRangeType(newentry->key);
     262             :     TypeCacheEntry *typcache;
     263             :     bool        has_subtype_diff;
     264             :     RangeBound  orig_lower,
     265             :                 new_lower,
     266             :                 orig_upper,
     267             :                 new_upper;
     268             :     bool        orig_empty,
     269             :                 new_empty;
     270             : 
     271      159829 :     if (RangeTypeGetOid(orig) != RangeTypeGetOid(new))
     272           0 :         elog(ERROR, "range types do not match");
     273             : 
     274      159829 :     typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig));
     275             : 
     276      159829 :     has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
     277             : 
     278      159829 :     range_deserialize(typcache, orig, &orig_lower, &orig_upper, &orig_empty);
     279      159829 :     range_deserialize(typcache, new, &new_lower, &new_upper, &new_empty);
     280             : 
     281             :     /*
     282             :      * Distinct branches for handling distinct classes of ranges.  Note that
     283             :      * penalty values only need to be commensurate within the same class of
     284             :      * new range.
     285             :      */
     286      159829 :     if (new_empty)
     287             :     {
     288             :         /* Handle insertion of empty range */
     289       34758 :         if (orig_empty)
     290             :         {
     291             :             /*
     292             :              * The best case is to insert it to empty original range.
     293             :              * Insertion here means no broadening of original range. Also
     294             :              * original range is the most narrow.
     295             :              */
     296        2369 :             *penalty = 0.0;
     297             :         }
     298       32389 :         else if (RangeIsOrContainsEmpty(orig))
     299             :         {
     300             :             /*
     301             :              * The second case is to insert empty range into range which
     302             :              * contains at least one underlying empty range.  There is still
     303             :              * no broadening of original range, but original range is not as
     304             :              * narrow as possible.
     305             :              */
     306         386 :             *penalty = CONTAIN_EMPTY_PENALTY;
     307             :         }
     308       32003 :         else if (orig_lower.infinite && orig_upper.infinite)
     309             :         {
     310             :             /*
     311             :              * Original range requires broadening.  (-inf; +inf) is most far
     312             :              * from normal range in this case.
     313             :              */
     314           0 :             *penalty = 2 * CONTAIN_EMPTY_PENALTY;
     315             :         }
     316       32003 :         else if (orig_lower.infinite || orig_upper.infinite)
     317             :         {
     318             :             /*
     319             :              * (-inf, x) or (x, +inf) original ranges are closer to normal
     320             :              * ranges, so it's worse to mix it with empty ranges.
     321             :              */
     322           0 :             *penalty = 3 * CONTAIN_EMPTY_PENALTY;
     323             :         }
     324             :         else
     325             :         {
     326             :             /*
     327             :              * The least preferred case is broadening of normal range.
     328             :              */
     329       32003 :             *penalty = 4 * CONTAIN_EMPTY_PENALTY;
     330             :         }
     331             :     }
     332      125071 :     else if (new_lower.infinite && new_upper.infinite)
     333             :     {
     334             :         /* Handle insertion of (-inf, +inf) range */
     335           0 :         if (orig_lower.infinite && orig_upper.infinite)
     336             :         {
     337             :             /*
     338             :              * Best case is inserting to (-inf, +inf) original range.
     339             :              */
     340           0 :             *penalty = 0.0;
     341             :         }
     342           0 :         else if (orig_lower.infinite || orig_upper.infinite)
     343             :         {
     344             :             /*
     345             :              * When original range is (-inf, x) or (x, +inf) it requires
     346             :              * broadening of original range (extension of one bound to
     347             :              * infinity).
     348             :              */
     349           0 :             *penalty = INFINITE_BOUND_PENALTY;
     350             :         }
     351             :         else
     352             :         {
     353             :             /*
     354             :              * Insertion to normal original range is least preferred.
     355             :              */
     356           0 :             *penalty = 2 * INFINITE_BOUND_PENALTY;
     357             :         }
     358             : 
     359           0 :         if (RangeIsOrContainsEmpty(orig))
     360             :         {
     361             :             /*
     362             :              * Original range is narrower when it doesn't contain empty
     363             :              * ranges. Add additional penalty otherwise.
     364             :              */
     365           0 :             *penalty += CONTAIN_EMPTY_PENALTY;
     366             :         }
     367             :     }
     368      125071 :     else if (new_lower.infinite)
     369             :     {
     370             :         /* Handle insertion of (-inf, x) range */
     371        4668 :         if (!orig_empty && orig_lower.infinite)
     372             :         {
     373         396 :             if (orig_upper.infinite)
     374             :             {
     375             :                 /*
     376             :                  * (-inf, +inf) range won't be extended by insertion of (-inf,
     377             :                  * x) range. It's a less desirable case than insertion to
     378             :                  * (-inf, y) original range without extension, because in that
     379             :                  * case original range is narrower. But we can't express that
     380             :                  * in single float value.
     381             :                  */
     382           0 :                 *penalty = 0.0;
     383             :             }
     384             :             else
     385             :             {
     386         198 :                 if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
     387             :                 {
     388             :                     /*
     389             :                      * Get extension of original range using subtype_diff. Use
     390             :                      * constant if subtype_diff unavailable.
     391             :                      */
     392         143 :                     if (has_subtype_diff)
     393         143 :                         *penalty = call_subtype_diff(typcache,
     394             :                                                      new_upper.val,
     395             :                                                      orig_upper.val);
     396             :                     else
     397           0 :                         *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
     398             :                 }
     399             :                 else
     400             :                 {
     401             :                     /* No extension of original range */
     402          55 :                     *penalty = 0.0;
     403             :                 }
     404             :             }
     405             :         }
     406             :         else
     407             :         {
     408             :             /*
     409             :              * If lower bound of original range is not -inf, then extension of
     410             :              * it is infinity.
     411             :              */
     412        4470 :             *penalty = get_float4_infinity();
     413             :         }
     414             :     }
     415      120403 :     else if (new_upper.infinite)
     416             :     {
     417             :         /* Handle insertion of (x, +inf) range */
     418        2976 :         if (!orig_empty && orig_upper.infinite)
     419             :         {
     420         396 :             if (orig_lower.infinite)
     421             :             {
     422             :                 /*
     423             :                  * (-inf, +inf) range won't be extended by insertion of (x,
     424             :                  * +inf) range. It's a less desirable case than insertion to
     425             :                  * (y, +inf) original range without extension, because in that
     426             :                  * case original range is narrower. But we can't express that
     427             :                  * in single float value.
     428             :                  */
     429          99 :                 *penalty = 0.0;
     430             :             }
     431             :             else
     432             :             {
     433          99 :                 if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
     434             :                 {
     435             :                     /*
     436             :                      * Get extension of original range using subtype_diff. Use
     437             :                      * constant if subtype_diff unavailable.
     438             :                      */
     439           0 :                     if (has_subtype_diff)
     440           0 :                         *penalty = call_subtype_diff(typcache,
     441             :                                                      orig_lower.val,
     442             :                                                      new_lower.val);
     443             :                     else
     444           0 :                         *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
     445             :                 }
     446             :                 else
     447             :                 {
     448             :                     /* No extension of original range */
     449          99 :                     *penalty = 0.0;
     450             :                 }
     451             :             }
     452             :         }
     453             :         else
     454             :         {
     455             :             /*
     456             :              * If upper bound of original range is not +inf, then extension of
     457             :              * it is infinity.
     458             :              */
     459        2778 :             *penalty = get_float4_infinity();
     460             :         }
     461             :     }
     462             :     else
     463             :     {
     464             :         /* Handle insertion of normal (non-empty, non-infinite) range */
     465      117427 :         if (orig_empty || orig_lower.infinite || orig_upper.infinite)
     466             :         {
     467             :             /*
     468             :              * Avoid mixing normal ranges with infinite and empty ranges.
     469             :              */
     470        9315 :             *penalty = get_float4_infinity();
     471             :         }
     472             :         else
     473             :         {
     474             :             /*
     475             :              * Calculate extension of original range by calling subtype_diff.
     476             :              * Use constant if subtype_diff unavailable.
     477             :              */
     478      108112 :             float8      diff = 0.0;
     479             : 
     480      108112 :             if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
     481             :             {
     482       36471 :                 if (has_subtype_diff)
     483       36471 :                     diff += call_subtype_diff(typcache,
     484             :                                               orig_lower.val,
     485             :                                               new_lower.val);
     486             :                 else
     487           0 :                     diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
     488             :             }
     489      108112 :             if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
     490             :             {
     491       83684 :                 if (has_subtype_diff)
     492       83684 :                     diff += call_subtype_diff(typcache,
     493             :                                               new_upper.val,
     494             :                                               orig_upper.val);
     495             :                 else
     496           0 :                     diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
     497             :             }
     498      108112 :             *penalty = diff;
     499             :         }
     500             :     }
     501             : 
     502      159829 :     PG_RETURN_POINTER(penalty);
     503             : }
     504             : 
     505             : /*
     506             :  * The GiST PickSplit method for ranges
     507             :  *
     508             :  * Primarily, we try to segregate ranges of different classes.  If splitting
     509             :  * ranges of the same class, use the appropriate split method for that class.
     510             :  */
     511             : Datum
     512          64 : range_gist_picksplit(PG_FUNCTION_ARGS)
     513             : {
     514          64 :     GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
     515          64 :     GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
     516             :     TypeCacheEntry *typcache;
     517             :     OffsetNumber i;
     518             :     RangeType  *pred_left;
     519             :     int         nbytes;
     520             :     OffsetNumber maxoff;
     521             :     int         count_in_classes[CLS_COUNT];
     522             :     int         j;
     523          64 :     int         non_empty_classes_count = 0;
     524          64 :     int         biggest_class = -1;
     525          64 :     int         biggest_class_count = 0;
     526             :     int         total_count;
     527             : 
     528             :     /* use first item to look up range type's info */
     529          64 :     pred_left = DatumGetRangeType(entryvec->vector[FirstOffsetNumber].key);
     530          64 :     typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left));
     531             : 
     532          64 :     maxoff = entryvec->n - 1;
     533          64 :     nbytes = (maxoff + 1) * sizeof(OffsetNumber);
     534          64 :     v->spl_left = (OffsetNumber *) palloc(nbytes);
     535          64 :     v->spl_right = (OffsetNumber *) palloc(nbytes);
     536             : 
     537             :     /*
     538             :      * Get count distribution of range classes.
     539             :      */
     540          64 :     memset(count_in_classes, 0, sizeof(count_in_classes));
     541       18560 :     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
     542             :     {
     543       18496 :         RangeType  *range = DatumGetRangeType(entryvec->vector[i].key);
     544             : 
     545       18496 :         count_in_classes[get_gist_range_class(range)]++;
     546             :     }
     547             : 
     548             :     /*
     549             :      * Count non-empty classes and find biggest class.
     550             :      */
     551          64 :     total_count = maxoff;
     552         640 :     for (j = 0; j < CLS_COUNT; j++)
     553             :     {
     554         576 :         if (count_in_classes[j] > 0)
     555             :         {
     556          68 :             if (count_in_classes[j] > biggest_class_count)
     557             :             {
     558          66 :                 biggest_class_count = count_in_classes[j];
     559          66 :                 biggest_class = j;
     560             :             }
     561          68 :             non_empty_classes_count++;
     562             :         }
     563             :     }
     564             : 
     565          64 :     Assert(non_empty_classes_count > 0);
     566             : 
     567          64 :     if (non_empty_classes_count == 1)
     568             :     {
     569             :         /* One non-empty class, so split inside class */
     570          61 :         if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_NORMAL)
     571             :         {
     572             :             /* double sorting split for normal ranges */
     573          55 :             range_gist_double_sorting_split(typcache, entryvec, v);
     574             :         }
     575           6 :         else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_LOWER_INF)
     576             :         {
     577             :             /* upper bound sorting split for (-inf, x) ranges */
     578           0 :             range_gist_single_sorting_split(typcache, entryvec, v, true);
     579             :         }
     580           6 :         else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_UPPER_INF)
     581             :         {
     582             :             /* lower bound sorting split for (x, +inf) ranges */
     583           0 :             range_gist_single_sorting_split(typcache, entryvec, v, false);
     584             :         }
     585             :         else
     586             :         {
     587             :             /* trivial split for all (-inf, +inf) or all empty ranges */
     588           6 :             range_gist_fallback_split(typcache, entryvec, v);
     589             :         }
     590             :     }
     591             :     else
     592             :     {
     593             :         /*
     594             :          * Class based split.
     595             :          *
     596             :          * To which side of the split should each class go?  Initialize them
     597             :          * all to go to the left side.
     598             :          */
     599             :         SplitLR     classes_groups[CLS_COUNT];
     600             : 
     601           3 :         memset(classes_groups, 0, sizeof(classes_groups));
     602             : 
     603           3 :         if (count_in_classes[CLS_NORMAL] > 0)
     604             :         {
     605             :             /* separate normal ranges if any */
     606           3 :             classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
     607             :         }
     608             :         else
     609             :         {
     610             :             /*----------
     611             :              * Try to split classes in one of two ways:
     612             :              *  1) containing infinities - not containing infinities
     613             :              *  2) containing empty - not containing empty
     614             :              *
     615             :              * Select the way which balances the ranges between left and right
     616             :              * the best. If split in these ways is not possible, there are at
     617             :              * most 3 classes, so just separate biggest class.
     618             :              *----------
     619             :              */
     620             :             int         infCount,
     621             :                         nonInfCount;
     622             :             int         emptyCount,
     623             :                         nonEmptyCount;
     624             : 
     625           0 :             nonInfCount =
     626           0 :                 count_in_classes[CLS_NORMAL] +
     627           0 :                 count_in_classes[CLS_CONTAIN_EMPTY] +
     628           0 :                 count_in_classes[CLS_EMPTY];
     629           0 :             infCount = total_count - nonInfCount;
     630             : 
     631           0 :             nonEmptyCount =
     632           0 :                 count_in_classes[CLS_NORMAL] +
     633           0 :                 count_in_classes[CLS_LOWER_INF] +
     634           0 :                 count_in_classes[CLS_UPPER_INF] +
     635           0 :                 count_in_classes[CLS_LOWER_INF | CLS_UPPER_INF];
     636           0 :             emptyCount = total_count - nonEmptyCount;
     637             : 
     638           0 :             if (infCount > 0 && nonInfCount > 0 &&
     639           0 :                 (Abs(infCount - nonInfCount) <=
     640           0 :                  Abs(emptyCount - nonEmptyCount)))
     641             :             {
     642           0 :                 classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
     643           0 :                 classes_groups[CLS_CONTAIN_EMPTY] = SPLIT_RIGHT;
     644           0 :                 classes_groups[CLS_EMPTY] = SPLIT_RIGHT;
     645             :             }
     646           0 :             else if (emptyCount > 0 && nonEmptyCount > 0)
     647             :             {
     648           0 :                 classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
     649           0 :                 classes_groups[CLS_LOWER_INF] = SPLIT_RIGHT;
     650           0 :                 classes_groups[CLS_UPPER_INF] = SPLIT_RIGHT;
     651           0 :                 classes_groups[CLS_LOWER_INF | CLS_UPPER_INF] = SPLIT_RIGHT;
     652             :             }
     653             :             else
     654             :             {
     655             :                 /*
     656             :                  * Either total_count == emptyCount or total_count ==
     657             :                  * infCount.
     658             :                  */
     659           0 :                 classes_groups[biggest_class] = SPLIT_RIGHT;
     660             :             }
     661             :         }
     662             : 
     663           3 :         range_gist_class_split(typcache, entryvec, v, classes_groups);
     664             :     }
     665             : 
     666          64 :     PG_RETURN_POINTER(v);
     667             : }
     668             : 
     669             : /* equality comparator for GiST */
     670             : Datum
     671       11846 : range_gist_same(PG_FUNCTION_ARGS)
     672             : {
     673       11846 :     RangeType  *r1 = PG_GETARG_RANGE(0);
     674       11846 :     RangeType  *r2 = PG_GETARG_RANGE(1);
     675       11846 :     bool       *result = (bool *) PG_GETARG_POINTER(2);
     676             : 
     677             :     /*
     678             :      * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to check
     679             :      * that for ourselves.  More generally, if the entries have been properly
     680             :      * normalized, then unequal flags bytes must mean unequal ranges ... so
     681             :      * let's just test all the flag bits at once.
     682             :      */
     683       11846 :     if (range_get_flags(r1) != range_get_flags(r2))
     684           6 :         *result = false;
     685             :     else
     686             :     {
     687             :         TypeCacheEntry *typcache;
     688             : 
     689       11840 :         typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
     690             : 
     691       11840 :         *result = range_eq_internal(typcache, r1, r2);
     692             :     }
     693             : 
     694       11846 :     PG_RETURN_POINTER(result);
     695             : }
     696             : 
     697             : /*
     698             :  *----------------------------------------------------------
     699             :  * STATIC FUNCTIONS
     700             :  *----------------------------------------------------------
     701             :  */
     702             : 
     703             : /*
     704             :  * Return the smallest range that contains r1 and r2
     705             :  *
     706             :  * This differs from regular range_union in two critical ways:
     707             :  * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb
     708             :  * the intervening values into the result range.
     709             :  * 2. We track whether any empty range has been union'd into the result,
     710             :  * so that contained_by searches can be indexed.  Note that this means
     711             :  * that *all* unions formed within the GiST index must go through here.
     712             :  */
     713             : static RangeType *
     714       34108 : range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
     715             : {
     716             :     RangeType  *result;
     717             :     RangeBound  lower1,
     718             :                 lower2;
     719             :     RangeBound  upper1,
     720             :                 upper2;
     721             :     bool        empty1,
     722             :                 empty2;
     723             :     char        flags1,
     724             :                 flags2;
     725             :     RangeBound *result_lower;
     726             :     RangeBound *result_upper;
     727             : 
     728       34108 :     range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
     729       34108 :     range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
     730       34108 :     flags1 = range_get_flags(r1);
     731       34108 :     flags2 = range_get_flags(r2);
     732             : 
     733       34108 :     if (empty1)
     734             :     {
     735             :         /* We can return r2 as-is if it already is or contains empty */
     736        4311 :         if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
     737        4311 :             return r2;
     738             :         /* Else we'd better copy it (modify-in-place isn't safe) */
     739           0 :         r2 = rangeCopy(r2);
     740           0 :         range_set_contain_empty(r2);
     741           0 :         return r2;
     742             :     }
     743       29797 :     if (empty2)
     744             :     {
     745             :         /* We can return r1 as-is if it already is or contains empty */
     746         388 :         if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
     747         386 :             return r1;
     748             :         /* Else we'd better copy it (modify-in-place isn't safe) */
     749           2 :         r1 = rangeCopy(r1);
     750           2 :         range_set_contain_empty(r1);
     751           2 :         return r1;
     752             :     }
     753             : 
     754       29409 :     if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0)
     755       29397 :         result_lower = &lower1;
     756             :     else
     757          12 :         result_lower = &lower2;
     758             : 
     759       29409 :     if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0)
     760        6841 :         result_upper = &upper1;
     761             :     else
     762       22568 :         result_upper = &upper2;
     763             : 
     764             :     /* optimization to avoid constructing a new range */
     765       29409 :     if (result_lower == &lower1 && result_upper == &upper1 &&
     766        6839 :         ((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY)))
     767        6839 :         return r1;
     768       22570 :     if (result_lower == &lower2 && result_upper == &upper2 &&
     769          10 :         ((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY)))
     770          10 :         return r2;
     771             : 
     772       22560 :     result = make_range(typcache, result_lower, result_upper, false);
     773             : 
     774       22560 :     if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY))
     775           0 :         range_set_contain_empty(result);
     776             : 
     777       22560 :     return result;
     778             : }
     779             : 
     780             : /*
     781             :  * GiST consistent test on an index internal page
     782             :  */
     783             : static bool
     784         726 : range_gist_consistent_int(TypeCacheEntry *typcache, StrategyNumber strategy,
     785             :                           RangeType *key, Datum query)
     786             : {
     787         726 :     switch (strategy)
     788             :     {
     789             :         case RANGESTRAT_BEFORE:
     790          66 :             if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeType(query)))
     791           8 :                 return false;
     792          58 :             return (!range_overright_internal(typcache, key,
     793          58 :                                               DatumGetRangeType(query)));
     794             :         case RANGESTRAT_OVERLEFT:
     795          66 :             if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeType(query)))
     796           8 :                 return false;
     797          58 :             return (!range_after_internal(typcache, key,
     798          58 :                                           DatumGetRangeType(query)));
     799             :         case RANGESTRAT_OVERLAPS:
     800          66 :             return range_overlaps_internal(typcache, key,
     801          66 :                                            DatumGetRangeType(query));
     802             :         case RANGESTRAT_OVERRIGHT:
     803          66 :             if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeType(query)))
     804           8 :                 return false;
     805          58 :             return (!range_before_internal(typcache, key,
     806          58 :                                            DatumGetRangeType(query)));
     807             :         case RANGESTRAT_AFTER:
     808          66 :             if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeType(query)))
     809           8 :                 return false;
     810          58 :             return (!range_overleft_internal(typcache, key,
     811          58 :                                              DatumGetRangeType(query)));
     812             :         case RANGESTRAT_ADJACENT:
     813          66 :             if (RangeIsEmpty(key) || RangeIsEmpty(DatumGetRangeType(query)))
     814           8 :                 return false;
     815          58 :             if (range_adjacent_internal(typcache, key,
     816          58 :                                         DatumGetRangeType(query)))
     817           0 :                 return true;
     818          58 :             return range_overlaps_internal(typcache, key,
     819          58 :                                            DatumGetRangeType(query));
     820             :         case RANGESTRAT_CONTAINS:
     821         132 :             return range_contains_internal(typcache, key,
     822         132 :                                            DatumGetRangeType(query));
     823             :         case RANGESTRAT_CONTAINED_BY:
     824             : 
     825             :             /*
     826             :              * Empty ranges are contained by anything, so if key is or
     827             :              * contains any empty ranges, we must descend into it.  Otherwise,
     828             :              * descend only if key overlaps the query.
     829             :              */
     830          66 :             if (RangeIsOrContainsEmpty(key))
     831           8 :                 return true;
     832          58 :             return range_overlaps_internal(typcache, key,
     833          58 :                                            DatumGetRangeType(query));
     834             :         case RANGESTRAT_CONTAINS_ELEM:
     835          66 :             return range_contains_elem_internal(typcache, key, query);
     836             :         case RANGESTRAT_EQ:
     837             : 
     838             :             /*
     839             :              * If query is empty, descend only if the key is or contains any
     840             :              * empty ranges.  Otherwise, descend if key contains query.
     841             :              */
     842          66 :             if (RangeIsEmpty(DatumGetRangeType(query)))
     843           0 :                 return RangeIsOrContainsEmpty(key);
     844          66 :             return range_contains_internal(typcache, key,
     845          66 :                                            DatumGetRangeType(query));
     846             :         default:
     847           0 :             elog(ERROR, "unrecognized range strategy: %d", strategy);
     848             :             return false;       /* keep compiler quiet */
     849             :     }
     850             : }
     851             : 
     852             : /*
     853             :  * GiST consistent test on an index leaf page
     854             :  */
     855             : static bool
     856       53225 : range_gist_consistent_leaf(TypeCacheEntry *typcache, StrategyNumber strategy,
     857             :                            RangeType *key, Datum query)
     858             : {
     859       53225 :     switch (strategy)
     860             :     {
     861             :         case RANGESTRAT_BEFORE:
     862        2000 :             return range_before_internal(typcache, key,
     863        2000 :                                          DatumGetRangeType(query));
     864             :         case RANGESTRAT_OVERLEFT:
     865        4774 :             return range_overleft_internal(typcache, key,
     866        4774 :                                            DatumGetRangeType(query));
     867             :         case RANGESTRAT_OVERLAPS:
     868        1428 :             return range_overlaps_internal(typcache, key,
     869        1428 :                                            DatumGetRangeType(query));
     870             :         case RANGESTRAT_OVERRIGHT:
     871       10400 :             return range_overright_internal(typcache, key,
     872       10400 :                                             DatumGetRangeType(query));
     873             :         case RANGESTRAT_AFTER:
     874        9318 :             return range_after_internal(typcache, key,
     875        9318 :                                         DatumGetRangeType(query));
     876             :         case RANGESTRAT_ADJACENT:
     877        4774 :             return range_adjacent_internal(typcache, key,
     878        4774 :                                            DatumGetRangeType(query));
     879             :         case RANGESTRAT_CONTAINS:
     880       13815 :             return range_contains_internal(typcache, key,
     881       13815 :                                            DatumGetRangeType(query));
     882             :         case RANGESTRAT_CONTAINED_BY:
     883        3861 :             return range_contained_by_internal(typcache, key,
     884        3861 :                                                DatumGetRangeType(query));
     885             :         case RANGESTRAT_CONTAINS_ELEM:
     886        1415 :             return range_contains_elem_internal(typcache, key, query);
     887             :         case RANGESTRAT_EQ:
     888        1440 :             return range_eq_internal(typcache, key, DatumGetRangeType(query));
     889             :         default:
     890           0 :             elog(ERROR, "unrecognized range strategy: %d", strategy);
     891             :             return false;       /* keep compiler quiet */
     892             :     }
     893             : }
     894             : 
     895             : /*
     896             :  * Trivial split: half of entries will be placed on one page
     897             :  * and the other half on the other page.
     898             :  */
     899             : static void
     900           6 : range_gist_fallback_split(TypeCacheEntry *typcache,
     901             :                           GistEntryVector *entryvec,
     902             :                           GIST_SPLITVEC *v)
     903             : {
     904           6 :     RangeType  *left_range = NULL;
     905           6 :     RangeType  *right_range = NULL;
     906             :     OffsetNumber i,
     907             :                 maxoff,
     908             :                 split_idx;
     909             : 
     910           6 :     maxoff = entryvec->n - 1;
     911             :     /* Split entries before this to left page, after to right: */
     912           6 :     split_idx = (maxoff - FirstOffsetNumber) / 2 + FirstOffsetNumber;
     913             : 
     914           6 :     v->spl_nleft = 0;
     915           6 :     v->spl_nright = 0;
     916        2331 :     for (i = FirstOffsetNumber; i <= maxoff; i++)
     917             :     {
     918        2325 :         RangeType  *range = DatumGetRangeType(entryvec->vector[i].key);
     919             : 
     920        2325 :         if (i < split_idx)
     921        1158 :             PLACE_LEFT(range, i);
     922             :         else
     923        1167 :             PLACE_RIGHT(range, i);
     924             :     }
     925             : 
     926           6 :     v->spl_ldatum = RangeTypeGetDatum(left_range);
     927           6 :     v->spl_rdatum = RangeTypeGetDatum(right_range);
     928           6 : }
     929             : 
     930             : /*
     931             :  * Split based on classes of ranges.
     932             :  *
     933             :  * See get_gist_range_class for class definitions.
     934             :  * classes_groups is an array of length CLS_COUNT indicating the side of the
     935             :  * split to which each class should go.
     936             :  */
     937             : static void
     938           3 : range_gist_class_split(TypeCacheEntry *typcache,
     939             :                        GistEntryVector *entryvec,
     940             :                        GIST_SPLITVEC *v,
     941             :                        SplitLR *classes_groups)
     942             : {
     943           3 :     RangeType  *left_range = NULL;
     944           3 :     RangeType  *right_range = NULL;
     945             :     OffsetNumber i,
     946             :                 maxoff;
     947             : 
     948           3 :     maxoff = entryvec->n - 1;
     949             : 
     950           3 :     v->spl_nleft = 0;
     951           3 :     v->spl_nright = 0;
     952         984 :     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
     953             :     {
     954         981 :         RangeType  *range = DatumGetRangeType(entryvec->vector[i].key);
     955             :         int         class;
     956             : 
     957             :         /* Get class of range */
     958         981 :         class = get_gist_range_class(range);
     959             : 
     960             :         /* Place range to appropriate page */
     961         981 :         if (classes_groups[class] == SPLIT_LEFT)
     962         558 :             PLACE_LEFT(range, i);
     963             :         else
     964             :         {
     965         423 :             Assert(classes_groups[class] == SPLIT_RIGHT);
     966         423 :             PLACE_RIGHT(range, i);
     967             :         }
     968             :     }
     969             : 
     970           3 :     v->spl_ldatum = RangeTypeGetDatum(left_range);
     971           3 :     v->spl_rdatum = RangeTypeGetDatum(right_range);
     972           3 : }
     973             : 
     974             : /*
     975             :  * Sorting based split. First half of entries according to the sort will be
     976             :  * placed to one page, and second half of entries will be placed to other
     977             :  * page. use_upper_bound parameter indicates whether to use upper or lower
     978             :  * bound for sorting.
     979             :  */
     980             : static void
     981           0 : range_gist_single_sorting_split(TypeCacheEntry *typcache,
     982             :                                 GistEntryVector *entryvec,
     983             :                                 GIST_SPLITVEC *v,
     984             :                                 bool use_upper_bound)
     985             : {
     986             :     SingleBoundSortItem *sortItems;
     987           0 :     RangeType  *left_range = NULL;
     988           0 :     RangeType  *right_range = NULL;
     989             :     OffsetNumber i,
     990             :                 maxoff,
     991             :                 split_idx;
     992             : 
     993           0 :     maxoff = entryvec->n - 1;
     994             : 
     995           0 :     sortItems = (SingleBoundSortItem *)
     996           0 :         palloc(maxoff * sizeof(SingleBoundSortItem));
     997             : 
     998             :     /*
     999             :      * Prepare auxiliary array and sort the values.
    1000             :      */
    1001           0 :     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
    1002             :     {
    1003           0 :         RangeType  *range = DatumGetRangeType(entryvec->vector[i].key);
    1004             :         RangeBound  bound2;
    1005             :         bool        empty;
    1006             : 
    1007           0 :         sortItems[i - 1].index = i;
    1008             :         /* Put appropriate bound into array */
    1009           0 :         if (use_upper_bound)
    1010           0 :             range_deserialize(typcache, range, &bound2,
    1011           0 :                               &sortItems[i - 1].bound, &empty);
    1012             :         else
    1013           0 :             range_deserialize(typcache, range, &sortItems[i - 1].bound,
    1014             :                               &bound2, &empty);
    1015           0 :         Assert(!empty);
    1016             :     }
    1017             : 
    1018           0 :     qsort_arg(sortItems, maxoff, sizeof(SingleBoundSortItem),
    1019             :               single_bound_cmp, typcache);
    1020             : 
    1021           0 :     split_idx = maxoff / 2;
    1022             : 
    1023           0 :     v->spl_nleft = 0;
    1024           0 :     v->spl_nright = 0;
    1025             : 
    1026           0 :     for (i = 0; i < maxoff; i++)
    1027             :     {
    1028           0 :         int         idx = sortItems[i].index;
    1029           0 :         RangeType  *range = DatumGetRangeType(entryvec->vector[idx].key);
    1030             : 
    1031           0 :         if (i < split_idx)
    1032           0 :             PLACE_LEFT(range, idx);
    1033             :         else
    1034           0 :             PLACE_RIGHT(range, idx);
    1035             :     }
    1036             : 
    1037           0 :     v->spl_ldatum = RangeTypeGetDatum(left_range);
    1038           0 :     v->spl_rdatum = RangeTypeGetDatum(right_range);
    1039           0 : }
    1040             : 
    1041             : /*
    1042             :  * Double sorting split algorithm.
    1043             :  *
    1044             :  * The algorithm considers dividing ranges into two groups. The first (left)
    1045             :  * group contains general left bound. The second (right) group contains
    1046             :  * general right bound. The challenge is to find upper bound of left group
    1047             :  * and lower bound of right group so that overlap of groups is minimal and
    1048             :  * ratio of distribution is acceptable. Algorithm finds for each lower bound of
    1049             :  * right group minimal upper bound of left group, and for each upper bound of
    1050             :  * left group maximal lower bound of right group. For each found pair
    1051             :  * range_gist_consider_split considers replacement of currently selected
    1052             :  * split with the new one.
    1053             :  *
    1054             :  * After that, all the entries are divided into three groups:
    1055             :  * 1) Entries which should be placed to the left group
    1056             :  * 2) Entries which should be placed to the right group
    1057             :  * 3) "Common entries" which can be placed to either group without affecting
    1058             :  *    amount of overlap.
    1059             :  *
    1060             :  * The common ranges are distributed by difference of distance from lower
    1061             :  * bound of common range to lower bound of right group and distance from upper
    1062             :  * bound of common range to upper bound of left group.
    1063             :  *
    1064             :  * For details see:
    1065             :  * "A new double sorting-based node splitting algorithm for R-tree",
    1066             :  * A. Korotkov
    1067             :  * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
    1068             :  */
    1069             : static void
    1070          55 : range_gist_double_sorting_split(TypeCacheEntry *typcache,
    1071             :                                 GistEntryVector *entryvec,
    1072             :                                 GIST_SPLITVEC *v)
    1073             : {
    1074             :     ConsiderSplitContext context;
    1075             :     OffsetNumber i,
    1076             :                 maxoff;
    1077             :     RangeType  *range,
    1078          55 :                *left_range = NULL,
    1079          55 :                *right_range = NULL;
    1080             :     int         common_entries_count;
    1081             :     NonEmptyRange *by_lower,
    1082             :                *by_upper;
    1083             :     CommonEntry *common_entries;
    1084             :     int         nentries,
    1085             :                 i1,
    1086             :                 i2;
    1087             :     RangeBound *right_lower,
    1088             :                *left_upper;
    1089             : 
    1090          55 :     memset(&context, 0, sizeof(ConsiderSplitContext));
    1091          55 :     context.typcache = typcache;
    1092          55 :     context.has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
    1093             : 
    1094          55 :     maxoff = entryvec->n - 1;
    1095          55 :     nentries = context.entries_count = maxoff - FirstOffsetNumber + 1;
    1096          55 :     context.first = true;
    1097             : 
    1098             :     /* Allocate arrays for sorted range bounds */
    1099          55 :     by_lower = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
    1100          55 :     by_upper = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
    1101             : 
    1102             :     /* Fill arrays of bounds */
    1103       15245 :     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
    1104             :     {
    1105       15190 :         RangeType  *range = DatumGetRangeType(entryvec->vector[i].key);
    1106             :         bool        empty;
    1107             : 
    1108       30380 :         range_deserialize(typcache, range,
    1109       15190 :                           &by_lower[i - FirstOffsetNumber].lower,
    1110       15190 :                           &by_lower[i - FirstOffsetNumber].upper,
    1111             :                           &empty);
    1112       15190 :         Assert(!empty);
    1113             :     }
    1114             : 
    1115             :     /*
    1116             :      * Make two arrays of range bounds: one sorted by lower bound and another
    1117             :      * sorted by upper bound.
    1118             :      */
    1119          55 :     memcpy(by_upper, by_lower, nentries * sizeof(NonEmptyRange));
    1120          55 :     qsort_arg(by_lower, nentries, sizeof(NonEmptyRange),
    1121             :               interval_cmp_lower, typcache);
    1122          55 :     qsort_arg(by_upper, nentries, sizeof(NonEmptyRange),
    1123             :               interval_cmp_upper, typcache);
    1124             : 
    1125             :     /*----------
    1126             :      * The goal is to form a left and right range, so that every entry
    1127             :      * range is contained by either left or right interval (or both).
    1128             :      *
    1129             :      * For example, with the ranges (0,1), (1,3), (2,3), (2,4):
    1130             :      *
    1131             :      * 0 1 2 3 4
    1132             :      * +-+
    1133             :      *   +---+
    1134             :      *     +-+
    1135             :      *     +---+
    1136             :      *
    1137             :      * The left and right ranges are of the form (0,a) and (b,4).
    1138             :      * We first consider splits where b is the lower bound of an entry.
    1139             :      * We iterate through all entries, and for each b, calculate the
    1140             :      * smallest possible a. Then we consider splits where a is the
    1141             :      * upper bound of an entry, and for each a, calculate the greatest
    1142             :      * possible b.
    1143             :      *
    1144             :      * In the above example, the first loop would consider splits:
    1145             :      * b=0: (0,1)-(0,4)
    1146             :      * b=1: (0,1)-(1,4)
    1147             :      * b=2: (0,3)-(2,4)
    1148             :      *
    1149             :      * And the second loop:
    1150             :      * a=1: (0,1)-(1,4)
    1151             :      * a=3: (0,3)-(2,4)
    1152             :      * a=4: (0,4)-(2,4)
    1153             :      *----------
    1154             :      */
    1155             : 
    1156             :     /*
    1157             :      * Iterate over lower bound of right group, finding smallest possible
    1158             :      * upper bound of left group.
    1159             :      */
    1160          55 :     i1 = 0;
    1161          55 :     i2 = 0;
    1162          55 :     right_lower = &by_lower[i1].lower;
    1163          55 :     left_upper = &by_upper[i2].lower;
    1164             :     while (true)
    1165             :     {
    1166             :         /*
    1167             :          * Find next lower bound of right group.
    1168             :          */
    1169       75130 :         while (i1 < nentries &&
    1170       30070 :                range_cmp_bounds(typcache, right_lower,
    1171       30070 :                                 &by_lower[i1].lower) == 0)
    1172             :         {
    1173       15190 :             if (range_cmp_bounds(typcache, &by_lower[i1].upper,
    1174             :                                  left_upper) > 0)
    1175       12485 :                 left_upper = &by_lower[i1].upper;
    1176       15190 :             i1++;
    1177             :         }
    1178       14935 :         if (i1 >= nentries)
    1179          55 :             break;
    1180       14880 :         right_lower = &by_lower[i1].lower;
    1181             : 
    1182             :         /*
    1183             :          * Find count of ranges which anyway should be placed to the left
    1184             :          * group.
    1185             :          */
    1186       72508 :         while (i2 < nentries &&
    1187       27589 :                range_cmp_bounds(typcache, &by_upper[i2].upper,
    1188             :                                 left_upper) <= 0)
    1189       15159 :             i2++;
    1190             : 
    1191             :         /*
    1192             :          * Consider found split to see if it's better than what we had.
    1193             :          */
    1194       14880 :         range_gist_consider_split(&context, right_lower, i1, left_upper, i2);
    1195       14880 :     }
    1196             : 
    1197             :     /*
    1198             :      * Iterate over upper bound of left group finding greatest possible lower
    1199             :      * bound of right group.
    1200             :      */
    1201          55 :     i1 = nentries - 1;
    1202          55 :     i2 = nentries - 1;
    1203          55 :     right_lower = &by_lower[i1].upper;
    1204          55 :     left_upper = &by_upper[i2].upper;
    1205             :     while (true)
    1206             :     {
    1207             :         /*
    1208             :          * Find next upper bound of left group.
    1209             :          */
    1210       75895 :         while (i2 >= 0 &&
    1211       30325 :                range_cmp_bounds(typcache, left_upper,
    1212       30325 :                                 &by_upper[i2].upper) == 0)
    1213             :         {
    1214       15190 :             if (range_cmp_bounds(typcache, &by_upper[i2].lower,
    1215             :                                  right_lower) < 0)
    1216       12485 :                 right_lower = &by_upper[i2].lower;
    1217       15190 :             i2--;
    1218             :         }
    1219       15190 :         if (i2 < 0)
    1220          55 :             break;
    1221       15135 :         left_upper = &by_upper[i2].upper;
    1222             : 
    1223             :         /*
    1224             :          * Find count of intervals which anyway should be placed to the right
    1225             :          * group.
    1226             :          */
    1227       73018 :         while (i1 >= 0 &&
    1228       27589 :                range_cmp_bounds(typcache, &by_lower[i1].lower,
    1229             :                                 right_lower) >= 0)
    1230       15159 :             i1--;
    1231             : 
    1232             :         /*
    1233             :          * Consider found split to see if it's better than what we had.
    1234             :          */
    1235       15135 :         range_gist_consider_split(&context, right_lower, i1 + 1,
    1236             :                                   left_upper, i2 + 1);
    1237       15135 :     }
    1238             : 
    1239             :     /*
    1240             :      * If we failed to find any acceptable splits, use trivial split.
    1241             :      */
    1242          55 :     if (context.first)
    1243             :     {
    1244           0 :         range_gist_fallback_split(typcache, entryvec, v);
    1245          55 :         return;
    1246             :     }
    1247             : 
    1248             :     /*
    1249             :      * Ok, we have now selected bounds of the groups. Now we have to
    1250             :      * distribute entries themselves. At first we distribute entries which can
    1251             :      * be placed unambiguously and collect "common entries" to array.
    1252             :      */
    1253             : 
    1254             :     /* Allocate vectors for results */
    1255          55 :     v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
    1256          55 :     v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
    1257          55 :     v->spl_nleft = 0;
    1258          55 :     v->spl_nright = 0;
    1259             : 
    1260             :     /*
    1261             :      * Allocate an array for "common entries" - entries which can be placed to
    1262             :      * either group without affecting overlap along selected axis.
    1263             :      */
    1264          55 :     common_entries_count = 0;
    1265          55 :     common_entries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
    1266             : 
    1267             :     /*
    1268             :      * Distribute entries which can be distributed unambiguously, and collect
    1269             :      * common entries.
    1270             :      */
    1271       15245 :     for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
    1272             :     {
    1273             :         RangeBound  lower,
    1274             :                     upper;
    1275             :         bool        empty;
    1276             : 
    1277             :         /*
    1278             :          * Get upper and lower bounds along selected axis.
    1279             :          */
    1280       15190 :         range = DatumGetRangeType(entryvec->vector[i].key);
    1281             : 
    1282       15190 :         range_deserialize(typcache, range, &lower, &upper, &empty);
    1283             : 
    1284       15190 :         if (range_cmp_bounds(typcache, &upper, context.left_upper) <= 0)
    1285             :         {
    1286             :             /* Fits in the left group */
    1287        6819 :             if (range_cmp_bounds(typcache, &lower, context.right_lower) >= 0)
    1288             :             {
    1289             :                 /* Fits also in the right group, so "common entry" */
    1290        1559 :                 common_entries[common_entries_count].index = i;
    1291        1559 :                 if (context.has_subtype_diff)
    1292             :                 {
    1293             :                     /*
    1294             :                      * delta = (lower - context.right_lower) -
    1295             :                      * (context.left_upper - upper)
    1296             :                      */
    1297        4677 :                     common_entries[common_entries_count].delta =
    1298        1559 :                         call_subtype_diff(typcache,
    1299             :                                           lower.val,
    1300        1559 :                                           context.right_lower->val) -
    1301        3118 :                         call_subtype_diff(typcache,
    1302        1559 :                                           context.left_upper->val,
    1303             :                                           upper.val);
    1304             :                 }
    1305             :                 else
    1306             :                 {
    1307             :                     /* Without subtype_diff, take all deltas as zero */
    1308           0 :                     common_entries[common_entries_count].delta = 0;
    1309             :                 }
    1310        1559 :                 common_entries_count++;
    1311             :             }
    1312             :             else
    1313             :             {
    1314             :                 /* Doesn't fit to the right group, so join to the left group */
    1315        5260 :                 PLACE_LEFT(range, i);
    1316             :             }
    1317             :         }
    1318             :         else
    1319             :         {
    1320             :             /*
    1321             :              * Each entry should fit on either left or right group. Since this
    1322             :              * entry didn't fit in the left group, it better fit in the right
    1323             :              * group.
    1324             :              */
    1325        8371 :             Assert(range_cmp_bounds(typcache, &lower,
    1326             :                                     context.right_lower) >= 0);
    1327        8371 :             PLACE_RIGHT(range, i);
    1328             :         }
    1329             :     }
    1330             : 
    1331             :     /*
    1332             :      * Distribute "common entries", if any.
    1333             :      */
    1334          55 :     if (common_entries_count > 0)
    1335             :     {
    1336             :         /*
    1337             :          * Sort "common entries" by calculated deltas in order to distribute
    1338             :          * the most ambiguous entries first.
    1339             :          */
    1340          24 :         qsort(common_entries, common_entries_count, sizeof(CommonEntry),
    1341             :               common_entry_cmp);
    1342             : 
    1343             :         /*
    1344             :          * Distribute "common entries" between groups according to sorting.
    1345             :          */
    1346        1583 :         for (i = 0; i < common_entries_count; i++)
    1347             :         {
    1348        1559 :             int         idx = common_entries[i].index;
    1349             : 
    1350        1559 :             range = DatumGetRangeType(entryvec->vector[idx].key);
    1351             : 
    1352             :             /*
    1353             :              * Check if we have to place this entry in either group to achieve
    1354             :              * LIMIT_RATIO.
    1355             :              */
    1356        1559 :             if (i < context.common_left)
    1357           0 :                 PLACE_LEFT(range, idx);
    1358             :             else
    1359        1559 :                 PLACE_RIGHT(range, idx);
    1360             :         }
    1361             :     }
    1362             : 
    1363          55 :     v->spl_ldatum = PointerGetDatum(left_range);
    1364          55 :     v->spl_rdatum = PointerGetDatum(right_range);
    1365             : }
    1366             : 
    1367             : /*
    1368             :  * Consider replacement of currently selected split with a better one
    1369             :  * during range_gist_double_sorting_split.
    1370             :  */
    1371             : static void
    1372       30015 : range_gist_consider_split(ConsiderSplitContext *context,
    1373             :                           RangeBound *right_lower, int min_left_count,
    1374             :                           RangeBound *left_upper, int max_left_count)
    1375             : {
    1376             :     int         left_count,
    1377             :                 right_count;
    1378             :     float4      ratio,
    1379             :                 overlap;
    1380             : 
    1381             :     /*
    1382             :      * Calculate entries distribution ratio assuming most uniform distribution
    1383             :      * of common entries.
    1384             :      */
    1385       30015 :     if (min_left_count >= (context->entries_count + 1) / 2)
    1386       12944 :         left_count = min_left_count;
    1387       17071 :     else if (max_left_count <= context->entries_count / 2)
    1388       12837 :         left_count = max_left_count;
    1389             :     else
    1390        4234 :         left_count = context->entries_count / 2;
    1391       30015 :     right_count = context->entries_count - left_count;
    1392             : 
    1393             :     /*
    1394             :      * Ratio of split: quotient between size of smaller group and total
    1395             :      * entries count.  This is necessarily 0.5 or less; if it's less than
    1396             :      * LIMIT_RATIO then we will never accept the new split.
    1397             :      */
    1398       60030 :     ratio = ((float4) Min(left_count, right_count)) /
    1399       30015 :         ((float4) context->entries_count);
    1400             : 
    1401       30015 :     if (ratio > LIMIT_RATIO)
    1402             :     {
    1403       15008 :         bool        selectthis = false;
    1404             : 
    1405             :         /*
    1406             :          * The ratio is acceptable, so compare current split with previously
    1407             :          * selected one. We search for minimal overlap (allowing negative
    1408             :          * values) and minimal ratio secondarily.  If subtype_diff is
    1409             :          * available, it's used for overlap measure.  Without subtype_diff we
    1410             :          * use number of "common entries" as an overlap measure.
    1411             :          */
    1412       15008 :         if (context->has_subtype_diff)
    1413       15008 :             overlap = call_subtype_diff(context->typcache,
    1414             :                                         left_upper->val,
    1415             :                                         right_lower->val);
    1416             :         else
    1417           0 :             overlap = max_left_count - min_left_count;
    1418             : 
    1419             :         /* If there is no previous selection, select this split */
    1420       15008 :         if (context->first)
    1421          55 :             selectthis = true;
    1422             :         else
    1423             :         {
    1424             :             /*
    1425             :              * Choose the new split if it has a smaller overlap, or same
    1426             :              * overlap but better ratio.
    1427             :              */
    1428       27833 :             if (overlap < context->overlap ||
    1429       24285 :                 (overlap == context->overlap && ratio > context->ratio))
    1430        4356 :                 selectthis = true;
    1431             :         }
    1432             : 
    1433       15008 :         if (selectthis)
    1434             :         {
    1435             :             /* save information about selected split */
    1436        4411 :             context->first = false;
    1437        4411 :             context->ratio = ratio;
    1438        4411 :             context->overlap = overlap;
    1439        4411 :             context->right_lower = right_lower;
    1440        4411 :             context->left_upper = left_upper;
    1441        4411 :             context->common_left = max_left_count - left_count;
    1442        4411 :             context->common_right = left_count - min_left_count;
    1443             :         }
    1444             :     }
    1445       30015 : }
    1446             : 
    1447             : /*
    1448             :  * Find class number for range.
    1449             :  *
    1450             :  * The class number is a valid combination of the properties of the
    1451             :  * range.  Note: the highest possible number is 8, because CLS_EMPTY
    1452             :  * can't be combined with anything else.
    1453             :  */
    1454             : static int
    1455       19477 : get_gist_range_class(RangeType *range)
    1456             : {
    1457             :     int         classNumber;
    1458             :     char        flags;
    1459             : 
    1460       19477 :     flags = range_get_flags(range);
    1461       19477 :     if (flags & RANGE_EMPTY)
    1462             :     {
    1463        3101 :         classNumber = CLS_EMPTY;
    1464             :     }
    1465             :     else
    1466             :     {
    1467       16376 :         classNumber = 0;
    1468       16376 :         if (flags & RANGE_LB_INF)
    1469         200 :             classNumber |= CLS_LOWER_INF;
    1470       16376 :         if (flags & RANGE_UB_INF)
    1471         140 :             classNumber |= CLS_UPPER_INF;
    1472       16376 :         if (flags & RANGE_CONTAIN_EMPTY)
    1473           0 :             classNumber |= CLS_CONTAIN_EMPTY;
    1474             :     }
    1475       19477 :     return classNumber;
    1476             : }
    1477             : 
    1478             : /*
    1479             :  * Comparison function for range_gist_single_sorting_split.
    1480             :  */
    1481             : static int
    1482           0 : single_bound_cmp(const void *a, const void *b, void *arg)
    1483             : {
    1484           0 :     SingleBoundSortItem *i1 = (SingleBoundSortItem *) a;
    1485           0 :     SingleBoundSortItem *i2 = (SingleBoundSortItem *) b;
    1486           0 :     TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
    1487             : 
    1488           0 :     return range_cmp_bounds(typcache, &i1->bound, &i2->bound);
    1489             : }
    1490             : 
    1491             : /*
    1492             :  * Compare NonEmptyRanges by lower bound.
    1493             :  */
    1494             : static int
    1495       57667 : interval_cmp_lower(const void *a, const void *b, void *arg)
    1496             : {
    1497       57667 :     NonEmptyRange *i1 = (NonEmptyRange *) a;
    1498       57667 :     NonEmptyRange *i2 = (NonEmptyRange *) b;
    1499       57667 :     TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
    1500             : 
    1501       57667 :     return range_cmp_bounds(typcache, &i1->lower, &i2->lower);
    1502             : }
    1503             : 
    1504             : /*
    1505             :  * Compare NonEmptyRanges by upper bound.
    1506             :  */
    1507             : static int
    1508       58088 : interval_cmp_upper(const void *a, const void *b, void *arg)
    1509             : {
    1510       58088 :     NonEmptyRange *i1 = (NonEmptyRange *) a;
    1511       58088 :     NonEmptyRange *i2 = (NonEmptyRange *) b;
    1512       58088 :     TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
    1513             : 
    1514       58088 :     return range_cmp_bounds(typcache, &i1->upper, &i2->upper);
    1515             : }
    1516             : 
    1517             : /*
    1518             :  * Compare CommonEntrys by their deltas.
    1519             :  */
    1520             : static int
    1521        1535 : common_entry_cmp(const void *i1, const void *i2)
    1522             : {
    1523        1535 :     double      delta1 = ((CommonEntry *) i1)->delta;
    1524        1535 :     double      delta2 = ((CommonEntry *) i2)->delta;
    1525             : 
    1526        1535 :     if (delta1 < delta2)
    1527        1535 :         return -1;
    1528           0 :     else if (delta1 > delta2)
    1529           0 :         return 1;
    1530             :     else
    1531           0 :         return 0;
    1532             : }
    1533             : 
    1534             : /*
    1535             :  * Convenience function to invoke type-specific subtype_diff function.
    1536             :  * Caller must have already checked that there is one for the range type.
    1537             :  */
    1538             : static float8
    1539      138424 : call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2)
    1540             : {
    1541             :     float8      value;
    1542             : 
    1543      138424 :     value = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
    1544             :                                              typcache->rng_collation,
    1545             :                                              val1, val2));
    1546             :     /* Cope with buggy subtype_diff function by returning zero */
    1547      138424 :     if (value >= 0.0)
    1548      138424 :         return value;
    1549           0 :     return 0.0;
    1550             : }

Generated by: LCOV version 1.11