LCOV - code coverage report
Current view: top level - src/backend/access/nbtree - nbtutils.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 430 597 72.0 %
Date: 2017-09-29 13:40:31 Functions: 23 27 85.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * nbtutils.c
       4             :  *    Utility code for Postgres btree implementation.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/access/nbtree/nbtutils.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : 
      16             : #include "postgres.h"
      17             : 
      18             : #include <time.h>
      19             : 
      20             : #include "access/nbtree.h"
      21             : #include "access/reloptions.h"
      22             : #include "access/relscan.h"
      23             : #include "miscadmin.h"
      24             : #include "utils/array.h"
      25             : #include "utils/lsyscache.h"
      26             : #include "utils/memutils.h"
      27             : #include "utils/rel.h"
      28             : 
      29             : 
      30             : typedef struct BTSortArrayContext
      31             : {
      32             :     FmgrInfo    flinfo;
      33             :     Oid         collation;
      34             :     bool        reverse;
      35             : } BTSortArrayContext;
      36             : 
      37             : static Datum _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey,
      38             :                          StrategyNumber strat,
      39             :                          Datum *elems, int nelems);
      40             : static int _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
      41             :                         bool reverse,
      42             :                         Datum *elems, int nelems);
      43             : static int  _bt_compare_array_elements(const void *a, const void *b, void *arg);
      44             : static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
      45             :                          ScanKey leftarg, ScanKey rightarg,
      46             :                          bool *result);
      47             : static bool _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption);
      48             : static void _bt_mark_scankey_required(ScanKey skey);
      49             : static bool _bt_check_rowcompare(ScanKey skey,
      50             :                      IndexTuple tuple, TupleDesc tupdesc,
      51             :                      ScanDirection dir, bool *continuescan);
      52             : 
      53             : 
      54             : /*
      55             :  * _bt_mkscankey
      56             :  *      Build an insertion scan key that contains comparison data from itup
      57             :  *      as well as comparator routines appropriate to the key datatypes.
      58             :  *
      59             :  *      The result is intended for use with _bt_compare().
      60             :  */
      61             : ScanKey
      62      205557 : _bt_mkscankey(Relation rel, IndexTuple itup)
      63             : {
      64             :     ScanKey     skey;
      65             :     TupleDesc   itupdesc;
      66             :     int         natts;
      67             :     int16      *indoption;
      68             :     int         i;
      69             : 
      70      205557 :     itupdesc = RelationGetDescr(rel);
      71      205557 :     natts = RelationGetNumberOfAttributes(rel);
      72      205557 :     indoption = rel->rd_indoption;
      73             : 
      74      205557 :     skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
      75             : 
      76      636947 :     for (i = 0; i < natts; i++)
      77             :     {
      78             :         FmgrInfo   *procinfo;
      79             :         Datum       arg;
      80             :         bool        null;
      81             :         int         flags;
      82             : 
      83             :         /*
      84             :          * We can use the cached (default) support procs since no cross-type
      85             :          * comparison can be needed.
      86             :          */
      87      431390 :         procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
      88      431390 :         arg = index_getattr(itup, i + 1, itupdesc, &null);
      89      431390 :         flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT);
      90      862780 :         ScanKeyEntryInitializeWithInfo(&skey[i],
      91             :                                        flags,
      92      431390 :                                        (AttrNumber) (i + 1),
      93             :                                        InvalidStrategy,
      94             :                                        InvalidOid,
      95      431390 :                                        rel->rd_indcollation[i],
      96             :                                        procinfo,
      97             :                                        arg);
      98             :     }
      99             : 
     100      205557 :     return skey;
     101             : }
     102             : 
     103             : /*
     104             :  * _bt_mkscankey_nodata
     105             :  *      Build an insertion scan key that contains 3-way comparator routines
     106             :  *      appropriate to the key datatypes, but no comparison data.  The
     107             :  *      comparison data ultimately used must match the key datatypes.
     108             :  *
     109             :  *      The result cannot be used with _bt_compare(), unless comparison
     110             :  *      data is first stored into the key entries.  Currently this
     111             :  *      routine is only called by nbtsort.c and tuplesort.c, which have
     112             :  *      their own comparison routines.
     113             :  */
     114             : ScanKey
     115        2632 : _bt_mkscankey_nodata(Relation rel)
     116             : {
     117             :     ScanKey     skey;
     118             :     int         natts;
     119             :     int16      *indoption;
     120             :     int         i;
     121             : 
     122        2632 :     natts = RelationGetNumberOfAttributes(rel);
     123        2632 :     indoption = rel->rd_indoption;
     124             : 
     125        2632 :     skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
     126             : 
     127        7050 :     for (i = 0; i < natts; i++)
     128             :     {
     129             :         FmgrInfo   *procinfo;
     130             :         int         flags;
     131             : 
     132             :         /*
     133             :          * We can use the cached (default) support procs since no cross-type
     134             :          * comparison can be needed.
     135             :          */
     136        4418 :         procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
     137        4418 :         flags = SK_ISNULL | (indoption[i] << SK_BT_INDOPTION_SHIFT);
     138        8836 :         ScanKeyEntryInitializeWithInfo(&skey[i],
     139             :                                        flags,
     140        4418 :                                        (AttrNumber) (i + 1),
     141             :                                        InvalidStrategy,
     142             :                                        InvalidOid,
     143        4418 :                                        rel->rd_indcollation[i],
     144             :                                        procinfo,
     145             :                                        (Datum) 0);
     146             :     }
     147             : 
     148        2632 :     return skey;
     149             : }
     150             : 
     151             : /*
     152             :  * free a scan key made by either _bt_mkscankey or _bt_mkscankey_nodata.
     153             :  */
     154             : void
     155      208114 : _bt_freeskey(ScanKey skey)
     156             : {
     157      208114 :     pfree(skey);
     158      208114 : }
     159             : 
     160             : /*
     161             :  * free a retracement stack made by _bt_search.
     162             :  */
     163             : void
     164      592670 : _bt_freestack(BTStack stack)
     165             : {
     166             :     BTStack     ostack;
     167             : 
     168     1665841 :     while (stack != NULL)
     169             :     {
     170      480501 :         ostack = stack;
     171      480501 :         stack = stack->bts_parent;
     172      480501 :         pfree(ostack);
     173             :     }
     174      592670 : }
     175             : 
     176             : 
     177             : /*
     178             :  *  _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys
     179             :  *
     180             :  * If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and
     181             :  * set up BTArrayKeyInfo info for each one that is an equality-type key.
     182             :  * Prepare modified scan keys in so->arrayKeyData, which will hold the current
     183             :  * array elements during each primitive indexscan operation.  For inequality
     184             :  * array keys, it's sufficient to find the extreme element value and replace
     185             :  * the whole array with that scalar value.
     186             :  *
     187             :  * Note: the reason we need so->arrayKeyData, rather than just scribbling
     188             :  * on scan->keyData, is that callers are permitted to call btrescan without
     189             :  * supplying a new set of scankey data.
     190             :  */
     191             : void
     192      388316 : _bt_preprocess_array_keys(IndexScanDesc scan)
     193             : {
     194      388316 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
     195      388316 :     int         numberOfKeys = scan->numberOfKeys;
     196      388316 :     int16      *indoption = scan->indexRelation->rd_indoption;
     197             :     int         numArrayKeys;
     198             :     ScanKey     cur;
     199             :     int         i;
     200             :     MemoryContext oldContext;
     201             : 
     202             :     /* Quick check to see if there are any array keys */
     203      388316 :     numArrayKeys = 0;
     204     1053715 :     for (i = 0; i < numberOfKeys; i++)
     205             :     {
     206      665399 :         cur = &scan->keyData[i];
     207      665399 :         if (cur->sk_flags & SK_SEARCHARRAY)
     208             :         {
     209          31 :             numArrayKeys++;
     210          31 :             Assert(!(cur->sk_flags & (SK_ROW_HEADER | SK_SEARCHNULL | SK_SEARCHNOTNULL)));
     211             :             /* If any arrays are null as a whole, we can quit right now. */
     212          31 :             if (cur->sk_flags & SK_ISNULL)
     213             :             {
     214           0 :                 so->numArrayKeys = -1;
     215           0 :                 so->arrayKeyData = NULL;
     216           0 :                 return;
     217             :             }
     218             :         }
     219             :     }
     220             : 
     221             :     /* Quit if nothing to do. */
     222      388316 :     if (numArrayKeys == 0)
     223             :     {
     224      388285 :         so->numArrayKeys = 0;
     225      388285 :         so->arrayKeyData = NULL;
     226      388285 :         return;
     227             :     }
     228             : 
     229             :     /*
     230             :      * Make a scan-lifespan context to hold array-associated data, or reset it
     231             :      * if we already have one from a previous rescan cycle.
     232             :      */
     233          31 :     if (so->arrayContext == NULL)
     234          31 :         so->arrayContext = AllocSetContextCreate(CurrentMemoryContext,
     235             :                                                  "BTree array context",
     236             :                                                  ALLOCSET_SMALL_SIZES);
     237             :     else
     238           0 :         MemoryContextReset(so->arrayContext);
     239             : 
     240          31 :     oldContext = MemoryContextSwitchTo(so->arrayContext);
     241             : 
     242             :     /* Create modifiable copy of scan->keyData in the workspace context */
     243          31 :     so->arrayKeyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
     244          62 :     memcpy(so->arrayKeyData,
     245          31 :            scan->keyData,
     246          31 :            scan->numberOfKeys * sizeof(ScanKeyData));
     247             : 
     248             :     /* Allocate space for per-array data in the workspace context */
     249          31 :     so->arrayKeys = (BTArrayKeyInfo *) palloc0(numArrayKeys * sizeof(BTArrayKeyInfo));
     250             : 
     251             :     /* Now process each array key */
     252          31 :     numArrayKeys = 0;
     253          66 :     for (i = 0; i < numberOfKeys; i++)
     254             :     {
     255             :         ArrayType  *arrayval;
     256             :         int16       elmlen;
     257             :         bool        elmbyval;
     258             :         char        elmalign;
     259             :         int         num_elems;
     260             :         Datum      *elem_values;
     261             :         bool       *elem_nulls;
     262             :         int         num_nonnulls;
     263             :         int         j;
     264             : 
     265          35 :         cur = &so->arrayKeyData[i];
     266          35 :         if (!(cur->sk_flags & SK_SEARCHARRAY))
     267           8 :             continue;
     268             : 
     269             :         /*
     270             :          * First, deconstruct the array into elements.  Anything allocated
     271             :          * here (including a possibly detoasted array value) is in the
     272             :          * workspace context.
     273             :          */
     274          31 :         arrayval = DatumGetArrayTypeP(cur->sk_argument);
     275             :         /* We could cache this data, but not clear it's worth it */
     276          31 :         get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
     277             :                              &elmlen, &elmbyval, &elmalign);
     278          31 :         deconstruct_array(arrayval,
     279             :                           ARR_ELEMTYPE(arrayval),
     280             :                           elmlen, elmbyval, elmalign,
     281             :                           &elem_values, &elem_nulls, &num_elems);
     282             : 
     283             :         /*
     284             :          * Compress out any null elements.  We can ignore them since we assume
     285             :          * all btree operators are strict.
     286             :          */
     287          31 :         num_nonnulls = 0;
     288         138 :         for (j = 0; j < num_elems; j++)
     289             :         {
     290         107 :             if (!elem_nulls[j])
     291         107 :                 elem_values[num_nonnulls++] = elem_values[j];
     292             :         }
     293             : 
     294             :         /* We could pfree(elem_nulls) now, but not worth the cycles */
     295             : 
     296             :         /* If there's no non-nulls, the scan qual is unsatisfiable */
     297          31 :         if (num_nonnulls == 0)
     298             :         {
     299           0 :             numArrayKeys = -1;
     300           0 :             break;
     301             :         }
     302             : 
     303             :         /*
     304             :          * If the comparison operator is not equality, then the array qual
     305             :          * degenerates to a simple comparison against the smallest or largest
     306             :          * non-null array element, as appropriate.
     307             :          */
     308          31 :         switch (cur->sk_strategy)
     309             :         {
     310             :             case BTLessStrategyNumber:
     311             :             case BTLessEqualStrategyNumber:
     312           0 :                 cur->sk_argument =
     313           0 :                     _bt_find_extreme_element(scan, cur,
     314             :                                              BTGreaterStrategyNumber,
     315             :                                              elem_values, num_nonnulls);
     316           0 :                 continue;
     317             :             case BTEqualStrategyNumber:
     318             :                 /* proceed with rest of loop */
     319          31 :                 break;
     320             :             case BTGreaterEqualStrategyNumber:
     321             :             case BTGreaterStrategyNumber:
     322           0 :                 cur->sk_argument =
     323           0 :                     _bt_find_extreme_element(scan, cur,
     324             :                                              BTLessStrategyNumber,
     325             :                                              elem_values, num_nonnulls);
     326           0 :                 continue;
     327             :             default:
     328           0 :                 elog(ERROR, "unrecognized StrategyNumber: %d",
     329             :                      (int) cur->sk_strategy);
     330             :                 break;
     331             :         }
     332             : 
     333             :         /*
     334             :          * Sort the non-null elements and eliminate any duplicates.  We must
     335             :          * sort in the same ordering used by the index column, so that the
     336             :          * successive primitive indexscans produce data in index order.
     337             :          */
     338          62 :         num_elems = _bt_sort_array_elements(scan, cur,
     339          31 :                                             (indoption[cur->sk_attno - 1] & INDOPTION_DESC) != 0,
     340             :                                             elem_values, num_nonnulls);
     341             : 
     342             :         /*
     343             :          * And set up the BTArrayKeyInfo data.
     344             :          */
     345          31 :         so->arrayKeys[numArrayKeys].scan_key = i;
     346          31 :         so->arrayKeys[numArrayKeys].num_elems = num_elems;
     347          31 :         so->arrayKeys[numArrayKeys].elem_values = elem_values;
     348          31 :         numArrayKeys++;
     349             :     }
     350             : 
     351          31 :     so->numArrayKeys = numArrayKeys;
     352             : 
     353          31 :     MemoryContextSwitchTo(oldContext);
     354             : }
     355             : 
     356             : /*
     357             :  * _bt_find_extreme_element() -- get least or greatest array element
     358             :  *
     359             :  * scan and skey identify the index column, whose opfamily determines the
     360             :  * comparison semantics.  strat should be BTLessStrategyNumber to get the
     361             :  * least element, or BTGreaterStrategyNumber to get the greatest.
     362             :  */
     363             : static Datum
     364           0 : _bt_find_extreme_element(IndexScanDesc scan, ScanKey skey,
     365             :                          StrategyNumber strat,
     366             :                          Datum *elems, int nelems)
     367             : {
     368           0 :     Relation    rel = scan->indexRelation;
     369             :     Oid         elemtype,
     370             :                 cmp_op;
     371             :     RegProcedure cmp_proc;
     372             :     FmgrInfo    flinfo;
     373             :     Datum       result;
     374             :     int         i;
     375             : 
     376             :     /*
     377             :      * Determine the nominal datatype of the array elements.  We have to
     378             :      * support the convention that sk_subtype == InvalidOid means the opclass
     379             :      * input type; this is a hack to simplify life for ScanKeyInit().
     380             :      */
     381           0 :     elemtype = skey->sk_subtype;
     382           0 :     if (elemtype == InvalidOid)
     383           0 :         elemtype = rel->rd_opcintype[skey->sk_attno - 1];
     384             : 
     385             :     /*
     386             :      * Look up the appropriate comparison operator in the opfamily.
     387             :      *
     388             :      * Note: it's possible that this would fail, if the opfamily is
     389             :      * incomplete, but it seems quite unlikely that an opfamily would omit
     390             :      * non-cross-type comparison operators for any datatype that it supports
     391             :      * at all.
     392             :      */
     393           0 :     cmp_op = get_opfamily_member(rel->rd_opfamily[skey->sk_attno - 1],
     394             :                                  elemtype,
     395             :                                  elemtype,
     396             :                                  strat);
     397           0 :     if (!OidIsValid(cmp_op))
     398           0 :         elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
     399             :              strat, elemtype, elemtype,
     400             :              rel->rd_opfamily[skey->sk_attno - 1]);
     401           0 :     cmp_proc = get_opcode(cmp_op);
     402           0 :     if (!RegProcedureIsValid(cmp_proc))
     403           0 :         elog(ERROR, "missing oprcode for operator %u", cmp_op);
     404             : 
     405           0 :     fmgr_info(cmp_proc, &flinfo);
     406             : 
     407           0 :     Assert(nelems > 0);
     408           0 :     result = elems[0];
     409           0 :     for (i = 1; i < nelems; i++)
     410             :     {
     411           0 :         if (DatumGetBool(FunctionCall2Coll(&flinfo,
     412             :                                            skey->sk_collation,
     413             :                                            elems[i],
     414             :                                            result)))
     415           0 :             result = elems[i];
     416             :     }
     417             : 
     418           0 :     return result;
     419             : }
     420             : 
     421             : /*
     422             :  * _bt_sort_array_elements() -- sort and de-dup array elements
     423             :  *
     424             :  * The array elements are sorted in-place, and the new number of elements
     425             :  * after duplicate removal is returned.
     426             :  *
     427             :  * scan and skey identify the index column, whose opfamily determines the
     428             :  * comparison semantics.  If reverse is true, we sort in descending order.
     429             :  */
     430             : static int
     431          31 : _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
     432             :                         bool reverse,
     433             :                         Datum *elems, int nelems)
     434             : {
     435          31 :     Relation    rel = scan->indexRelation;
     436             :     Oid         elemtype;
     437             :     RegProcedure cmp_proc;
     438             :     BTSortArrayContext cxt;
     439             :     int         last_non_dup;
     440             :     int         i;
     441             : 
     442          31 :     if (nelems <= 1)
     443           0 :         return nelems;          /* no work to do */
     444             : 
     445             :     /*
     446             :      * Determine the nominal datatype of the array elements.  We have to
     447             :      * support the convention that sk_subtype == InvalidOid means the opclass
     448             :      * input type; this is a hack to simplify life for ScanKeyInit().
     449             :      */
     450          31 :     elemtype = skey->sk_subtype;
     451          31 :     if (elemtype == InvalidOid)
     452           0 :         elemtype = rel->rd_opcintype[skey->sk_attno - 1];
     453             : 
     454             :     /*
     455             :      * Look up the appropriate comparison function in the opfamily.
     456             :      *
     457             :      * Note: it's possible that this would fail, if the opfamily is
     458             :      * incomplete, but it seems quite unlikely that an opfamily would omit
     459             :      * non-cross-type support functions for any datatype that it supports at
     460             :      * all.
     461             :      */
     462          31 :     cmp_proc = get_opfamily_proc(rel->rd_opfamily[skey->sk_attno - 1],
     463             :                                  elemtype,
     464             :                                  elemtype,
     465             :                                  BTORDER_PROC);
     466          31 :     if (!RegProcedureIsValid(cmp_proc))
     467           0 :         elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
     468             :              BTORDER_PROC, elemtype, elemtype,
     469             :              rel->rd_opfamily[skey->sk_attno - 1]);
     470             : 
     471             :     /* Sort the array elements */
     472          31 :     fmgr_info(cmp_proc, &cxt.flinfo);
     473          31 :     cxt.collation = skey->sk_collation;
     474          31 :     cxt.reverse = reverse;
     475          31 :     qsort_arg((void *) elems, nelems, sizeof(Datum),
     476             :               _bt_compare_array_elements, (void *) &cxt);
     477             : 
     478             :     /* Now scan the sorted elements and remove duplicates */
     479          31 :     last_non_dup = 0;
     480         107 :     for (i = 1; i < nelems; i++)
     481             :     {
     482             :         int32       compare;
     483             : 
     484          76 :         compare = DatumGetInt32(FunctionCall2Coll(&cxt.flinfo,
     485             :                                                   cxt.collation,
     486             :                                                   elems[last_non_dup],
     487             :                                                   elems[i]));
     488          76 :         if (compare != 0)
     489          76 :             elems[++last_non_dup] = elems[i];
     490             :     }
     491             : 
     492          31 :     return last_non_dup + 1;
     493             : }
     494             : 
     495             : /*
     496             :  * qsort_arg comparator for sorting array elements
     497             :  */
     498             : static int
     499         127 : _bt_compare_array_elements(const void *a, const void *b, void *arg)
     500             : {
     501         127 :     Datum       da = *((const Datum *) a);
     502         127 :     Datum       db = *((const Datum *) b);
     503         127 :     BTSortArrayContext *cxt = (BTSortArrayContext *) arg;
     504             :     int32       compare;
     505             : 
     506         127 :     compare = DatumGetInt32(FunctionCall2Coll(&cxt->flinfo,
     507             :                                               cxt->collation,
     508             :                                               da, db));
     509         127 :     if (cxt->reverse)
     510           0 :         compare = -compare;
     511         127 :     return compare;
     512             : }
     513             : 
     514             : /*
     515             :  * _bt_start_array_keys() -- Initialize array keys at start of a scan
     516             :  *
     517             :  * Set up the cur_elem counters and fill in the first sk_argument value for
     518             :  * each array scankey.  We can't do this until we know the scan direction.
     519             :  */
     520             : void
     521          31 : _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir)
     522             : {
     523          31 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
     524             :     int         i;
     525             : 
     526          62 :     for (i = 0; i < so->numArrayKeys; i++)
     527             :     {
     528          31 :         BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
     529          31 :         ScanKey     skey = &so->arrayKeyData[curArrayKey->scan_key];
     530             : 
     531          31 :         Assert(curArrayKey->num_elems > 0);
     532          31 :         if (ScanDirectionIsBackward(dir))
     533           0 :             curArrayKey->cur_elem = curArrayKey->num_elems - 1;
     534             :         else
     535          31 :             curArrayKey->cur_elem = 0;
     536          31 :         skey->sk_argument = curArrayKey->elem_values[curArrayKey->cur_elem];
     537             :     }
     538          31 : }
     539             : 
     540             : /*
     541             :  * _bt_advance_array_keys() -- Advance to next set of array elements
     542             :  *
     543             :  * Returns TRUE if there is another set of values to consider, FALSE if not.
     544             :  * On TRUE result, the scankeys are initialized with the next set of values.
     545             :  */
     546             : bool
     547         107 : _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir)
     548             : {
     549         107 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
     550         107 :     bool        found = false;
     551             :     int         i;
     552             : 
     553             :     /*
     554             :      * We must advance the last array key most quickly, since it will
     555             :      * correspond to the lowest-order index column among the available
     556             :      * qualifications. This is necessary to ensure correct ordering of output
     557             :      * when there are multiple array keys.
     558             :      */
     559         138 :     for (i = so->numArrayKeys - 1; i >= 0; i--)
     560             :     {
     561         107 :         BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
     562         107 :         ScanKey     skey = &so->arrayKeyData[curArrayKey->scan_key];
     563         107 :         int         cur_elem = curArrayKey->cur_elem;
     564         107 :         int         num_elems = curArrayKey->num_elems;
     565             : 
     566         107 :         if (ScanDirectionIsBackward(dir))
     567             :         {
     568           0 :             if (--cur_elem < 0)
     569             :             {
     570           0 :                 cur_elem = num_elems - 1;
     571           0 :                 found = false;  /* need to advance next array key */
     572             :             }
     573             :             else
     574           0 :                 found = true;
     575             :         }
     576             :         else
     577             :         {
     578         107 :             if (++cur_elem >= num_elems)
     579             :             {
     580          31 :                 cur_elem = 0;
     581          31 :                 found = false;  /* need to advance next array key */
     582             :             }
     583             :             else
     584          76 :                 found = true;
     585             :         }
     586             : 
     587         107 :         curArrayKey->cur_elem = cur_elem;
     588         107 :         skey->sk_argument = curArrayKey->elem_values[cur_elem];
     589         107 :         if (found)
     590          76 :             break;
     591             :     }
     592             : 
     593             :     /* advance parallel scan */
     594         107 :     if (scan->parallel_scan != NULL)
     595           0 :         _bt_parallel_advance_array_keys(scan);
     596             : 
     597         107 :     return found;
     598             : }
     599             : 
     600             : /*
     601             :  * _bt_mark_array_keys() -- Handle array keys during btmarkpos
     602             :  *
     603             :  * Save the current state of the array keys as the "mark" position.
     604             :  */
     605             : void
     606           0 : _bt_mark_array_keys(IndexScanDesc scan)
     607             : {
     608           0 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
     609             :     int         i;
     610             : 
     611           0 :     for (i = 0; i < so->numArrayKeys; i++)
     612             :     {
     613           0 :         BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
     614             : 
     615           0 :         curArrayKey->mark_elem = curArrayKey->cur_elem;
     616             :     }
     617           0 : }
     618             : 
     619             : /*
     620             :  * _bt_restore_array_keys() -- Handle array keys during btrestrpos
     621             :  *
     622             :  * Restore the array keys to where they were when the mark was set.
     623             :  */
     624             : void
     625           0 : _bt_restore_array_keys(IndexScanDesc scan)
     626             : {
     627           0 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
     628           0 :     bool        changed = false;
     629             :     int         i;
     630             : 
     631             :     /* Restore each array key to its position when the mark was set */
     632           0 :     for (i = 0; i < so->numArrayKeys; i++)
     633             :     {
     634           0 :         BTArrayKeyInfo *curArrayKey = &so->arrayKeys[i];
     635           0 :         ScanKey     skey = &so->arrayKeyData[curArrayKey->scan_key];
     636           0 :         int         mark_elem = curArrayKey->mark_elem;
     637             : 
     638           0 :         if (curArrayKey->cur_elem != mark_elem)
     639             :         {
     640           0 :             curArrayKey->cur_elem = mark_elem;
     641           0 :             skey->sk_argument = curArrayKey->elem_values[mark_elem];
     642           0 :             changed = true;
     643             :         }
     644             :     }
     645             : 
     646             :     /*
     647             :      * If we changed any keys, we must redo _bt_preprocess_keys.  That might
     648             :      * sound like overkill, but in cases with multiple keys per index column
     649             :      * it seems necessary to do the full set of pushups.
     650             :      */
     651           0 :     if (changed)
     652             :     {
     653           0 :         _bt_preprocess_keys(scan);
     654             :         /* The mark should have been set on a consistent set of keys... */
     655           0 :         Assert(so->qual_ok);
     656             :     }
     657           0 : }
     658             : 
     659             : 
     660             : /*
     661             :  *  _bt_preprocess_keys() -- Preprocess scan keys
     662             :  *
     663             :  * The given search-type keys (in scan->keyData[] or so->arrayKeyData[])
     664             :  * are copied to so->keyData[] with possible transformation.
     665             :  * scan->numberOfKeys is the number of input keys, so->numberOfKeys gets
     666             :  * the number of output keys (possibly less, never greater).
     667             :  *
     668             :  * The output keys are marked with additional sk_flag bits beyond the
     669             :  * system-standard bits supplied by the caller.  The DESC and NULLS_FIRST
     670             :  * indoption bits for the relevant index attribute are copied into the flags.
     671             :  * Also, for a DESC column, we commute (flip) all the sk_strategy numbers
     672             :  * so that the index sorts in the desired direction.
     673             :  *
     674             :  * One key purpose of this routine is to discover which scan keys must be
     675             :  * satisfied to continue the scan.  It also attempts to eliminate redundant
     676             :  * keys and detect contradictory keys.  (If the index opfamily provides
     677             :  * incomplete sets of cross-type operators, we may fail to detect redundant
     678             :  * or contradictory keys, but we can survive that.)
     679             :  *
     680             :  * The output keys must be sorted by index attribute.  Presently we expect
     681             :  * (but verify) that the input keys are already so sorted --- this is done
     682             :  * by match_clauses_to_index() in indxpath.c.  Some reordering of the keys
     683             :  * within each attribute may be done as a byproduct of the processing here,
     684             :  * but no other code depends on that.
     685             :  *
     686             :  * The output keys are marked with flags SK_BT_REQFWD and/or SK_BT_REQBKWD
     687             :  * if they must be satisfied in order to continue the scan forward or backward
     688             :  * respectively.  _bt_checkkeys uses these flags.  For example, if the quals
     689             :  * are "x = 1 AND y < 4 AND z < 5", then _bt_checkkeys will reject a tuple
     690             :  * (1,2,7), but we must continue the scan in case there are tuples (1,3,z).
     691             :  * But once we reach tuples like (1,4,z) we can stop scanning because no
     692             :  * later tuples could match.  This is reflected by marking the x and y keys,
     693             :  * but not the z key, with SK_BT_REQFWD.  In general, the keys for leading
     694             :  * attributes with "=" keys are marked both SK_BT_REQFWD and SK_BT_REQBKWD.
     695             :  * For the first attribute without an "=" key, any "<" and "<=" keys are
     696             :  * marked SK_BT_REQFWD while any ">" and ">=" keys are marked SK_BT_REQBKWD.
     697             :  * This can be seen to be correct by considering the above example.  Note
     698             :  * in particular that if there are no keys for a given attribute, the keys for
     699             :  * subsequent attributes can never be required; for instance "WHERE y = 4"
     700             :  * requires a full-index scan.
     701             :  *
     702             :  * If possible, redundant keys are eliminated: we keep only the tightest
     703             :  * >/>= bound and the tightest </<= bound, and if there's an = key then
     704             :  * that's the only one returned.  (So, we return either a single = key,
     705             :  * or one or two boundary-condition keys for each attr.)  However, if we
     706             :  * cannot compare two keys for lack of a suitable cross-type operator,
     707             :  * we cannot eliminate either.  If there are two such keys of the same
     708             :  * operator strategy, the second one is just pushed into the output array
     709             :  * without further processing here.  We may also emit both >/>= or both
     710             :  * </<= keys if we can't compare them.  The logic about required keys still
     711             :  * works if we don't eliminate redundant keys.
     712             :  *
     713             :  * Note that one reason we need direction-sensitive required-key flags is
     714             :  * precisely that we may not be able to eliminate redundant keys.  Suppose
     715             :  * we have "x > 4::int AND x > 10::bigint", and we are unable to determine
     716             :  * which key is more restrictive for lack of a suitable cross-type operator.
     717             :  * _bt_first will arbitrarily pick one of the keys to do the initial
     718             :  * positioning with.  If it picks x > 4, then the x > 10 condition will fail
     719             :  * until we reach index entries > 10; but we can't stop the scan just because
     720             :  * x > 10 is failing.  On the other hand, if we are scanning backwards, then
     721             :  * failure of either key is indeed enough to stop the scan.  (In general, when
     722             :  * inequality keys are present, the initial-positioning code only promises to
     723             :  * position before the first possible match, not exactly at the first match,
     724             :  * for a forward scan; or after the last match for a backward scan.)
     725             :  *
     726             :  * As a byproduct of this work, we can detect contradictory quals such
     727             :  * as "x = 1 AND x > 2".  If we see that, we return so->qual_ok = FALSE,
     728             :  * indicating the scan need not be run at all since no tuples can match.
     729             :  * (In this case we do not bother completing the output key array!)
     730             :  * Again, missing cross-type operators might cause us to fail to prove the
     731             :  * quals contradictory when they really are, but the scan will work correctly.
     732             :  *
     733             :  * Row comparison keys are currently also treated without any smarts:
     734             :  * we just transfer them into the preprocessed array without any
     735             :  * editorialization.  We can treat them the same as an ordinary inequality
     736             :  * comparison on the row's first index column, for the purposes of the logic
     737             :  * about required keys.
     738             :  *
     739             :  * Note: the reason we have to copy the preprocessed scan keys into private
     740             :  * storage is that we are modifying the array based on comparisons of the
     741             :  * key argument values, which could change on a rescan or after moving to
     742             :  * new elements of array keys.  Therefore we can't overwrite the source data.
     743             :  */
     744             : void
     745      388066 : _bt_preprocess_keys(IndexScanDesc scan)
     746             : {
     747      388066 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
     748      388066 :     int         numberOfKeys = scan->numberOfKeys;
     749      388066 :     int16      *indoption = scan->indexRelation->rd_indoption;
     750             :     int         new_numberOfKeys;
     751             :     int         numberOfEqualCols;
     752             :     ScanKey     inkeys;
     753             :     ScanKey     outkeys;
     754             :     ScanKey     cur;
     755             :     ScanKey     xform[BTMaxStrategyNumber];
     756             :     bool        test_result;
     757             :     int         i,
     758             :                 j;
     759             :     AttrNumber  attno;
     760             : 
     761             :     /* initialize result variables */
     762      388066 :     so->qual_ok = true;
     763      388066 :     so->numberOfKeys = 0;
     764             : 
     765      388066 :     if (numberOfKeys < 1)
     766      154604 :         return;                 /* done if qual-less scan */
     767             : 
     768             :     /*
     769             :      * Read so->arrayKeyData if array keys are present, else scan->keyData
     770             :      */
     771      387708 :     if (so->arrayKeyData != NULL)
     772         107 :         inkeys = so->arrayKeyData;
     773             :     else
     774      387601 :         inkeys = scan->keyData;
     775             : 
     776      387708 :     outkeys = so->keyData;
     777      387708 :     cur = &inkeys[0];
     778             :     /* we check that input keys are correctly ordered */
     779      387708 :     if (cur->sk_attno < 1)
     780           0 :         elog(ERROR, "btree index keys must be ordered by attribute");
     781             : 
     782             :     /* We can short-circuit most of the work if there's just one key */
     783      387708 :     if (numberOfKeys == 1)
     784             :     {
     785             :         /* Apply indoption to scankey (might change sk_strategy!) */
     786      153884 :         if (!_bt_fix_scankey_strategy(cur, indoption))
     787           7 :             so->qual_ok = false;
     788      153884 :         memcpy(outkeys, cur, sizeof(ScanKeyData));
     789      153884 :         so->numberOfKeys = 1;
     790             :         /* We can mark the qual as required if it's for first index col */
     791      153884 :         if (cur->sk_attno == 1)
     792      153863 :             _bt_mark_scankey_required(outkeys);
     793      153884 :         return;
     794             :     }
     795             : 
     796             :     /*
     797             :      * Otherwise, do the full set of pushups.
     798             :      */
     799      233824 :     new_numberOfKeys = 0;
     800      233824 :     numberOfEqualCols = 0;
     801             : 
     802             :     /*
     803             :      * Initialize for processing of keys for attr 1.
     804             :      *
     805             :      * xform[i] points to the currently best scan key of strategy type i+1; it
     806             :      * is NULL if we haven't yet found such a key for this attr.
     807             :      */
     808      233824 :     attno = 1;
     809      233824 :     memset(xform, 0, sizeof(xform));
     810             : 
     811             :     /*
     812             :      * Loop iterates from 0 to numberOfKeys inclusive; we use the last pass to
     813             :      * handle after-last-key processing.  Actual exit from the loop is at the
     814             :      * "break" statement below.
     815             :      */
     816      745116 :     for (i = 0;; cur++, i++)
     817             :     {
     818      745116 :         if (i < numberOfKeys)
     819             :         {
     820             :             /* Apply indoption to scankey (might change sk_strategy!) */
     821      511292 :             if (!_bt_fix_scankey_strategy(cur, indoption))
     822             :             {
     823             :                 /* NULL can't be matched, so give up */
     824           0 :                 so->qual_ok = false;
     825           0 :                 return;
     826             :             }
     827             :         }
     828             : 
     829             :         /*
     830             :          * If we are at the end of the keys for a particular attr, finish up
     831             :          * processing and emit the cleaned-up keys.
     832             :          */
     833      745116 :         if (i == numberOfKeys || cur->sk_attno != attno)
     834             :         {
     835      511205 :             int         priorNumberOfEqualCols = numberOfEqualCols;
     836             : 
     837             :             /* check input keys are correctly ordered */
     838      511205 :             if (i < numberOfKeys && cur->sk_attno < attno)
     839           0 :                 elog(ERROR, "btree index keys must be ordered by attribute");
     840             : 
     841             :             /*
     842             :              * If = has been specified, all other keys can be eliminated as
     843             :              * redundant.  If we have a case like key = 1 AND key > 2, we can
     844             :              * set qual_ok to false and abandon further processing.
     845             :              *
     846             :              * We also have to deal with the case of "key IS NULL", which is
     847             :              * unsatisfiable in combination with any other index condition. By
     848             :              * the time we get here, that's been classified as an equality
     849             :              * check, and we've rejected any combination of it with a regular
     850             :              * equality condition; but not with other types of conditions.
     851             :              */
     852      511205 :             if (xform[BTEqualStrategyNumber - 1])
     853             :             {
     854      484469 :                 ScanKey     eq = xform[BTEqualStrategyNumber - 1];
     855             : 
     856     3391263 :                 for (j = BTMaxStrategyNumber; --j >= 0;)
     857             :                 {
     858     2422329 :                     ScanKey     chk = xform[j];
     859             : 
     860     2422329 :                     if (!chk || j == (BTEqualStrategyNumber - 1))
     861     2422313 :                         continue;
     862             : 
     863          16 :                     if (eq->sk_flags & SK_SEARCHNULL)
     864             :                     {
     865             :                         /* IS NULL is contradictory to anything else */
     866           4 :                         so->qual_ok = false;
     867           4 :                         return;
     868             :                     }
     869             : 
     870          12 :                     if (_bt_compare_scankey_args(scan, chk, eq, chk,
     871             :                                                  &test_result))
     872             :                     {
     873          12 :                         if (!test_result)
     874             :                         {
     875             :                             /* keys proven mutually contradictory */
     876           0 :                             so->qual_ok = false;
     877           0 :                             return;
     878             :                         }
     879             :                         /* else discard the redundant non-equality key */
     880          12 :                         xform[j] = NULL;
     881             :                     }
     882             :                     /* else, cannot determine redundancy, keep both keys */
     883             :                 }
     884             :                 /* track number of attrs for which we have "=" keys */
     885      484465 :                 numberOfEqualCols++;
     886             :             }
     887             : 
     888             :             /* try to keep only one of <, <= */
     889      511201 :             if (xform[BTLessStrategyNumber - 1]
     890          83 :                 && xform[BTLessEqualStrategyNumber - 1])
     891             :             {
     892           0 :                 ScanKey     lt = xform[BTLessStrategyNumber - 1];
     893           0 :                 ScanKey     le = xform[BTLessEqualStrategyNumber - 1];
     894             : 
     895           0 :                 if (_bt_compare_scankey_args(scan, le, lt, le,
     896             :                                              &test_result))
     897             :                 {
     898           0 :                     if (test_result)
     899           0 :                         xform[BTLessEqualStrategyNumber - 1] = NULL;
     900             :                     else
     901           0 :                         xform[BTLessStrategyNumber - 1] = NULL;
     902             :                 }
     903             :             }
     904             : 
     905             :             /* try to keep only one of >, >= */
     906      511201 :             if (xform[BTGreaterStrategyNumber - 1]
     907       26196 :                 && xform[BTGreaterEqualStrategyNumber - 1])
     908             :             {
     909           0 :                 ScanKey     gt = xform[BTGreaterStrategyNumber - 1];
     910           0 :                 ScanKey     ge = xform[BTGreaterEqualStrategyNumber - 1];
     911             : 
     912           0 :                 if (_bt_compare_scankey_args(scan, ge, gt, ge,
     913             :                                              &test_result))
     914             :                 {
     915           0 :                     if (test_result)
     916           0 :                         xform[BTGreaterEqualStrategyNumber - 1] = NULL;
     917             :                     else
     918           0 :                         xform[BTGreaterStrategyNumber - 1] = NULL;
     919             :                 }
     920             :             }
     921             : 
     922             :             /*
     923             :              * Emit the cleaned-up keys into the outkeys[] array, and then
     924             :              * mark them if they are required.  They are required (possibly
     925             :              * only in one direction) if all attrs before this one had "=".
     926             :              */
     927     3578407 :             for (j = BTMaxStrategyNumber; --j >= 0;)
     928             :             {
     929     2556005 :                 if (xform[j])
     930             :                 {
     931      511270 :                     ScanKey     outkey = &outkeys[new_numberOfKeys++];
     932             : 
     933      511270 :                     memcpy(outkey, xform[j], sizeof(ScanKeyData));
     934      511270 :                     if (priorNumberOfEqualCols == attno - 1)
     935      511216 :                         _bt_mark_scankey_required(outkey);
     936             :                 }
     937             :             }
     938             : 
     939             :             /*
     940             :              * Exit loop here if done.
     941             :              */
     942      511201 :             if (i == numberOfKeys)
     943      233820 :                 break;
     944             : 
     945             :             /* Re-initialize for new attno */
     946      277381 :             attno = cur->sk_attno;
     947      277381 :             memset(xform, 0, sizeof(xform));
     948             :         }
     949             : 
     950             :         /* check strategy this key's operator corresponds to */
     951      511292 :         j = cur->sk_strategy - 1;
     952             : 
     953             :         /* if row comparison, push it directly to the output array */
     954      511292 :         if (cur->sk_flags & SK_ROW_HEADER)
     955             :         {
     956           0 :             ScanKey     outkey = &outkeys[new_numberOfKeys++];
     957             : 
     958           0 :             memcpy(outkey, cur, sizeof(ScanKeyData));
     959           0 :             if (numberOfEqualCols == attno - 1)
     960           0 :                 _bt_mark_scankey_required(outkey);
     961             : 
     962             :             /*
     963             :              * We don't support RowCompare using equality; such a qual would
     964             :              * mess up the numberOfEqualCols tracking.
     965             :              */
     966           0 :             Assert(j != (BTEqualStrategyNumber - 1));
     967           0 :             continue;
     968             :         }
     969             : 
     970             :         /* have we seen one of these before? */
     971      511292 :         if (xform[j] == NULL)
     972             :         {
     973             :             /* nope, so remember this scankey */
     974      511290 :             xform[j] = cur;
     975             :         }
     976             :         else
     977             :         {
     978             :             /* yup, keep only the more restrictive key */
     979           2 :             if (_bt_compare_scankey_args(scan, cur, cur, xform[j],
     980             :                                          &test_result))
     981             :             {
     982           2 :                 if (test_result)
     983           2 :                     xform[j] = cur;
     984           0 :                 else if (j == (BTEqualStrategyNumber - 1))
     985             :                 {
     986             :                     /* key == a && key == b, but a != b */
     987           0 :                     so->qual_ok = false;
     988           0 :                     return;
     989             :                 }
     990             :                 /* else old key is more restrictive, keep it */
     991             :             }
     992             :             else
     993             :             {
     994             :                 /*
     995             :                  * We can't determine which key is more restrictive.  Keep the
     996             :                  * previous one in xform[j] and push this one directly to the
     997             :                  * output array.
     998             :                  */
     999           0 :                 ScanKey     outkey = &outkeys[new_numberOfKeys++];
    1000             : 
    1001           0 :                 memcpy(outkey, cur, sizeof(ScanKeyData));
    1002           0 :                 if (numberOfEqualCols == attno - 1)
    1003           0 :                     _bt_mark_scankey_required(outkey);
    1004             :             }
    1005             :         }
    1006      511292 :     }
    1007             : 
    1008      233820 :     so->numberOfKeys = new_numberOfKeys;
    1009             : }
    1010             : 
    1011             : /*
    1012             :  * Compare two scankey values using a specified operator.
    1013             :  *
    1014             :  * The test we want to perform is logically "leftarg op rightarg", where
    1015             :  * leftarg and rightarg are the sk_argument values in those ScanKeys, and
    1016             :  * the comparison operator is the one in the op ScanKey.  However, in
    1017             :  * cross-data-type situations we may need to look up the correct operator in
    1018             :  * the index's opfamily: it is the one having amopstrategy = op->sk_strategy
    1019             :  * and amoplefttype/amoprighttype equal to the two argument datatypes.
    1020             :  *
    1021             :  * If the opfamily doesn't supply a complete set of cross-type operators we
    1022             :  * may not be able to make the comparison.  If we can make the comparison
    1023             :  * we store the operator result in *result and return TRUE.  We return FALSE
    1024             :  * if the comparison could not be made.
    1025             :  *
    1026             :  * Note: op always points at the same ScanKey as either leftarg or rightarg.
    1027             :  * Since we don't scribble on the scankeys, this aliasing should cause no
    1028             :  * trouble.
    1029             :  *
    1030             :  * Note: this routine needs to be insensitive to any DESC option applied
    1031             :  * to the index column.  For example, "x < 4" is a tighter constraint than
    1032             :  * "x < 5" regardless of which way the index is sorted.
    1033             :  */
    1034             : static bool
    1035          14 : _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
    1036             :                          ScanKey leftarg, ScanKey rightarg,
    1037             :                          bool *result)
    1038             : {
    1039          14 :     Relation    rel = scan->indexRelation;
    1040             :     Oid         lefttype,
    1041             :                 righttype,
    1042             :                 optype,
    1043             :                 opcintype,
    1044             :                 cmp_op;
    1045             :     StrategyNumber strat;
    1046             : 
    1047             :     /*
    1048             :      * First, deal with cases where one or both args are NULL.  This should
    1049             :      * only happen when the scankeys represent IS NULL/NOT NULL conditions.
    1050             :      */
    1051          14 :     if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ISNULL)
    1052             :     {
    1053             :         bool        leftnull,
    1054             :                     rightnull;
    1055             : 
    1056           4 :         if (leftarg->sk_flags & SK_ISNULL)
    1057             :         {
    1058           0 :             Assert(leftarg->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL));
    1059           0 :             leftnull = true;
    1060             :         }
    1061             :         else
    1062           4 :             leftnull = false;
    1063           4 :         if (rightarg->sk_flags & SK_ISNULL)
    1064             :         {
    1065           4 :             Assert(rightarg->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL));
    1066           4 :             rightnull = true;
    1067             :         }
    1068             :         else
    1069           0 :             rightnull = false;
    1070             : 
    1071             :         /*
    1072             :          * We treat NULL as either greater than or less than all other values.
    1073             :          * Since true > false, the tests below work correctly for NULLS LAST
    1074             :          * logic.  If the index is NULLS FIRST, we need to flip the strategy.
    1075             :          */
    1076           4 :         strat = op->sk_strategy;
    1077           4 :         if (op->sk_flags & SK_BT_NULLS_FIRST)
    1078           0 :             strat = BTCommuteStrategyNumber(strat);
    1079             : 
    1080           4 :         switch (strat)
    1081             :         {
    1082             :             case BTLessStrategyNumber:
    1083           4 :                 *result = (leftnull < rightnull);
    1084           4 :                 break;
    1085             :             case BTLessEqualStrategyNumber:
    1086           0 :                 *result = (leftnull <= rightnull);
    1087           0 :                 break;
    1088             :             case BTEqualStrategyNumber:
    1089           0 :                 *result = (leftnull == rightnull);
    1090           0 :                 break;
    1091             :             case BTGreaterEqualStrategyNumber:
    1092           0 :                 *result = (leftnull >= rightnull);
    1093           0 :                 break;
    1094             :             case BTGreaterStrategyNumber:
    1095           0 :                 *result = (leftnull > rightnull);
    1096           0 :                 break;
    1097             :             default:
    1098           0 :                 elog(ERROR, "unrecognized StrategyNumber: %d", (int) strat);
    1099             :                 *result = false;    /* keep compiler quiet */
    1100             :                 break;
    1101             :         }
    1102           4 :         return true;
    1103             :     }
    1104             : 
    1105             :     /*
    1106             :      * The opfamily we need to worry about is identified by the index column.
    1107             :      */
    1108          10 :     Assert(leftarg->sk_attno == rightarg->sk_attno);
    1109             : 
    1110          10 :     opcintype = rel->rd_opcintype[leftarg->sk_attno - 1];
    1111             : 
    1112             :     /*
    1113             :      * Determine the actual datatypes of the ScanKey arguments.  We have to
    1114             :      * support the convention that sk_subtype == InvalidOid means the opclass
    1115             :      * input type; this is a hack to simplify life for ScanKeyInit().
    1116             :      */
    1117          10 :     lefttype = leftarg->sk_subtype;
    1118          10 :     if (lefttype == InvalidOid)
    1119           0 :         lefttype = opcintype;
    1120          10 :     righttype = rightarg->sk_subtype;
    1121          10 :     if (righttype == InvalidOid)
    1122           0 :         righttype = opcintype;
    1123          10 :     optype = op->sk_subtype;
    1124          10 :     if (optype == InvalidOid)
    1125           0 :         optype = opcintype;
    1126             : 
    1127             :     /*
    1128             :      * If leftarg and rightarg match the types expected for the "op" scankey,
    1129             :      * we can use its already-looked-up comparison function.
    1130             :      */
    1131          10 :     if (lefttype == opcintype && righttype == optype)
    1132             :     {
    1133          10 :         *result = DatumGetBool(FunctionCall2Coll(&op->sk_func,
    1134             :                                                  op->sk_collation,
    1135             :                                                  leftarg->sk_argument,
    1136             :                                                  rightarg->sk_argument));
    1137          10 :         return true;
    1138             :     }
    1139             : 
    1140             :     /*
    1141             :      * Otherwise, we need to go to the syscache to find the appropriate
    1142             :      * operator.  (This cannot result in infinite recursion, since no
    1143             :      * indexscan initiated by syscache lookup will use cross-data-type
    1144             :      * operators.)
    1145             :      *
    1146             :      * If the sk_strategy was flipped by _bt_fix_scankey_strategy, we have to
    1147             :      * un-flip it to get the correct opfamily member.
    1148             :      */
    1149           0 :     strat = op->sk_strategy;
    1150           0 :     if (op->sk_flags & SK_BT_DESC)
    1151           0 :         strat = BTCommuteStrategyNumber(strat);
    1152             : 
    1153           0 :     cmp_op = get_opfamily_member(rel->rd_opfamily[leftarg->sk_attno - 1],
    1154             :                                  lefttype,
    1155             :                                  righttype,
    1156             :                                  strat);
    1157           0 :     if (OidIsValid(cmp_op))
    1158             :     {
    1159           0 :         RegProcedure cmp_proc = get_opcode(cmp_op);
    1160             : 
    1161           0 :         if (RegProcedureIsValid(cmp_proc))
    1162             :         {
    1163           0 :             *result = DatumGetBool(OidFunctionCall2Coll(cmp_proc,
    1164             :                                                         op->sk_collation,
    1165             :                                                         leftarg->sk_argument,
    1166             :                                                         rightarg->sk_argument));
    1167           0 :             return true;
    1168             :         }
    1169             :     }
    1170             : 
    1171             :     /* Can't make the comparison */
    1172           0 :     *result = false;            /* suppress compiler warnings */
    1173           0 :     return false;
    1174             : }
    1175             : 
    1176             : /*
    1177             :  * Adjust a scankey's strategy and flags setting as needed for indoptions.
    1178             :  *
    1179             :  * We copy the appropriate indoption value into the scankey sk_flags
    1180             :  * (shifting to avoid clobbering system-defined flag bits).  Also, if
    1181             :  * the DESC option is set, commute (flip) the operator strategy number.
    1182             :  *
    1183             :  * A secondary purpose is to check for IS NULL/NOT NULL scankeys and set up
    1184             :  * the strategy field correctly for them.
    1185             :  *
    1186             :  * Lastly, for ordinary scankeys (not IS NULL/NOT NULL), we check for a
    1187             :  * NULL comparison value.  Since all btree operators are assumed strict,
    1188             :  * a NULL means that the qual cannot be satisfied.  We return TRUE if the
    1189             :  * comparison value isn't NULL, or FALSE if the scan should be abandoned.
    1190             :  *
    1191             :  * This function is applied to the *input* scankey structure; therefore
    1192             :  * on a rescan we will be looking at already-processed scankeys.  Hence
    1193             :  * we have to be careful not to re-commute the strategy if we already did it.
    1194             :  * It's a bit ugly to modify the caller's copy of the scankey but in practice
    1195             :  * there shouldn't be any problem, since the index's indoptions are certainly
    1196             :  * not going to change while the scankey survives.
    1197             :  */
    1198             : static bool
    1199      665176 : _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
    1200             : {
    1201             :     int         addflags;
    1202             : 
    1203      665176 :     addflags = indoption[skey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
    1204             : 
    1205             :     /*
    1206             :      * We treat all btree operators as strict (even if they're not so marked
    1207             :      * in pg_proc). This means that it is impossible for an operator condition
    1208             :      * with a NULL comparison constant to succeed, and we can reject it right
    1209             :      * away.
    1210             :      *
    1211             :      * However, we now also support "x IS NULL" clauses as search conditions,
    1212             :      * so in that case keep going. The planner has not filled in any
    1213             :      * particular strategy in this case, so set it to BTEqualStrategyNumber
    1214             :      * --- we can treat IS NULL as an equality operator for purposes of search
    1215             :      * strategy.
    1216             :      *
    1217             :      * Likewise, "x IS NOT NULL" is supported.  We treat that as either "less
    1218             :      * than NULL" in a NULLS LAST index, or "greater than NULL" in a NULLS
    1219             :      * FIRST index.
    1220             :      *
    1221             :      * Note: someday we might have to fill in sk_collation from the index
    1222             :      * column's collation.  At the moment this is a non-issue because we'll
    1223             :      * never actually call the comparison operator on a NULL.
    1224             :      */
    1225      665176 :     if (skey->sk_flags & SK_ISNULL)
    1226             :     {
    1227             :         /* SK_ISNULL shouldn't be set in a row header scankey */
    1228         702 :         Assert(!(skey->sk_flags & SK_ROW_HEADER));
    1229             : 
    1230             :         /* Set indoption flags in scankey (might be done already) */
    1231         702 :         skey->sk_flags |= addflags;
    1232             : 
    1233             :         /* Set correct strategy for IS NULL or NOT NULL search */
    1234         702 :         if (skey->sk_flags & SK_SEARCHNULL)
    1235             :         {
    1236          20 :             skey->sk_strategy = BTEqualStrategyNumber;
    1237          20 :             skey->sk_subtype = InvalidOid;
    1238          20 :             skey->sk_collation = InvalidOid;
    1239             :         }
    1240         682 :         else if (skey->sk_flags & SK_SEARCHNOTNULL)
    1241             :         {
    1242         675 :             if (skey->sk_flags & SK_BT_NULLS_FIRST)
    1243           6 :                 skey->sk_strategy = BTGreaterStrategyNumber;
    1244             :             else
    1245         669 :                 skey->sk_strategy = BTLessStrategyNumber;
    1246         675 :             skey->sk_subtype = InvalidOid;
    1247         675 :             skey->sk_collation = InvalidOid;
    1248             :         }
    1249             :         else
    1250             :         {
    1251             :             /* regular qual, so it cannot be satisfied */
    1252           7 :             return false;
    1253             :         }
    1254             : 
    1255             :         /* Needn't do the rest */
    1256         695 :         return true;
    1257             :     }
    1258             : 
    1259             :     /* Adjust strategy for DESC, if we didn't already */
    1260      664474 :     if ((addflags & SK_BT_DESC) && !(skey->sk_flags & SK_BT_DESC))
    1261           0 :         skey->sk_strategy = BTCommuteStrategyNumber(skey->sk_strategy);
    1262      664474 :     skey->sk_flags |= addflags;
    1263             : 
    1264             :     /* If it's a row header, fix row member flags and strategies similarly */
    1265      664474 :     if (skey->sk_flags & SK_ROW_HEADER)
    1266             :     {
    1267           2 :         ScanKey     subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
    1268             : 
    1269             :         for (;;)
    1270             :         {
    1271           4 :             Assert(subkey->sk_flags & SK_ROW_MEMBER);
    1272           4 :             addflags = indoption[subkey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
    1273           4 :             if ((addflags & SK_BT_DESC) && !(subkey->sk_flags & SK_BT_DESC))
    1274           0 :                 subkey->sk_strategy = BTCommuteStrategyNumber(subkey->sk_strategy);
    1275           4 :             subkey->sk_flags |= addflags;
    1276           4 :             if (subkey->sk_flags & SK_ROW_END)
    1277           2 :                 break;
    1278           2 :             subkey++;
    1279           2 :         }
    1280             :     }
    1281             : 
    1282      664474 :     return true;
    1283             : }
    1284             : 
    1285             : /*
    1286             :  * Mark a scankey as "required to continue the scan".
    1287             :  *
    1288             :  * Depending on the operator type, the key may be required for both scan
    1289             :  * directions or just one.  Also, if the key is a row comparison header,
    1290             :  * we have to mark its first subsidiary ScanKey as required.  (Subsequent
    1291             :  * subsidiary ScanKeys are normally for lower-order columns, and thus
    1292             :  * cannot be required, since they're after the first non-equality scankey.)
    1293             :  *
    1294             :  * Note: when we set required-key flag bits in a subsidiary scankey, we are
    1295             :  * scribbling on a data structure belonging to the index AM's caller, not on
    1296             :  * our private copy.  This should be OK because the marking will not change
    1297             :  * from scan to scan within a query, and so we'd just re-mark the same way
    1298             :  * anyway on a rescan.  Something to keep an eye on though.
    1299             :  */
    1300             : static void
    1301      665079 : _bt_mark_scankey_required(ScanKey skey)
    1302             : {
    1303             :     int         addflags;
    1304             : 
    1305      665079 :     switch (skey->sk_strategy)
    1306             :     {
    1307             :         case BTLessStrategyNumber:
    1308             :         case BTLessEqualStrategyNumber:
    1309         803 :             addflags = SK_BT_REQFWD;
    1310         803 :             break;
    1311             :         case BTEqualStrategyNumber:
    1312      637482 :             addflags = SK_BT_REQFWD | SK_BT_REQBKWD;
    1313      637482 :             break;
    1314             :         case BTGreaterEqualStrategyNumber:
    1315             :         case BTGreaterStrategyNumber:
    1316       26794 :             addflags = SK_BT_REQBKWD;
    1317       26794 :             break;
    1318             :         default:
    1319           0 :             elog(ERROR, "unrecognized StrategyNumber: %d",
    1320             :                  (int) skey->sk_strategy);
    1321             :             addflags = 0;       /* keep compiler quiet */
    1322             :             break;
    1323             :     }
    1324             : 
    1325      665079 :     skey->sk_flags |= addflags;
    1326             : 
    1327      665079 :     if (skey->sk_flags & SK_ROW_HEADER)
    1328             :     {
    1329           2 :         ScanKey     subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
    1330             : 
    1331             :         /* First subkey should be same column/operator as the header */
    1332           2 :         Assert(subkey->sk_flags & SK_ROW_MEMBER);
    1333           2 :         Assert(subkey->sk_attno == skey->sk_attno);
    1334           2 :         Assert(subkey->sk_strategy == skey->sk_strategy);
    1335           2 :         subkey->sk_flags |= addflags;
    1336             :     }
    1337      665079 : }
    1338             : 
    1339             : /*
    1340             :  * Test whether an indextuple satisfies all the scankey conditions.
    1341             :  *
    1342             :  * If so, return the address of the index tuple on the index page.
    1343             :  * If not, return NULL.
    1344             :  *
    1345             :  * If the tuple fails to pass the qual, we also determine whether there's
    1346             :  * any need to continue the scan beyond this tuple, and set *continuescan
    1347             :  * accordingly.  See comments for _bt_preprocess_keys(), above, about how
    1348             :  * this is done.
    1349             :  *
    1350             :  * scan: index scan descriptor (containing a search-type scankey)
    1351             :  * page: buffer page containing index tuple
    1352             :  * offnum: offset number of index tuple (must be a valid item!)
    1353             :  * dir: direction we are scanning in
    1354             :  * continuescan: output parameter (will be set correctly in all cases)
    1355             :  *
    1356             :  * Caller must hold pin and lock on the index page.
    1357             :  */
    1358             : IndexTuple
    1359     2178020 : _bt_checkkeys(IndexScanDesc scan,
    1360             :               Page page, OffsetNumber offnum,
    1361             :               ScanDirection dir, bool *continuescan)
    1362             : {
    1363     2178020 :     ItemId      iid = PageGetItemId(page, offnum);
    1364             :     bool        tuple_alive;
    1365             :     IndexTuple  tuple;
    1366             :     TupleDesc   tupdesc;
    1367             :     BTScanOpaque so;
    1368             :     int         keysz;
    1369             :     int         ikey;
    1370             :     ScanKey     key;
    1371             : 
    1372     2178020 :     *continuescan = true;       /* default assumption */
    1373             : 
    1374             :     /*
    1375             :      * If the scan specifies not to return killed tuples, then we treat a
    1376             :      * killed tuple as not passing the qual.  Most of the time, it's a win to
    1377             :      * not bother examining the tuple's index keys, but just return
    1378             :      * immediately with continuescan = true to proceed to the next tuple.
    1379             :      * However, if this is the last tuple on the page, we should check the
    1380             :      * index keys to prevent uselessly advancing to the next page.
    1381             :      */
    1382     2178020 :     if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
    1383             :     {
    1384             :         /* return immediately if there are more tuples on the page */
    1385       65719 :         if (ScanDirectionIsForward(dir))
    1386             :         {
    1387       64088 :             if (offnum < PageGetMaxOffsetNumber(page))
    1388       63389 :                 return NULL;
    1389             :         }
    1390             :         else
    1391             :         {
    1392        1631 :             BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
    1393             : 
    1394        1631 :             if (offnum > P_FIRSTDATAKEY(opaque))
    1395        1605 :                 return NULL;
    1396             :         }
    1397             : 
    1398             :         /*
    1399             :          * OK, we want to check the keys so we can set continuescan correctly,
    1400             :          * but we'll return NULL even if the tuple passes the key tests.
    1401             :          */
    1402         725 :         tuple_alive = false;
    1403             :     }
    1404             :     else
    1405     2112301 :         tuple_alive = true;
    1406             : 
    1407     2113026 :     tuple = (IndexTuple) PageGetItem(page, iid);
    1408             : 
    1409     2113026 :     tupdesc = RelationGetDescr(scan->indexRelation);
    1410     2113026 :     so = (BTScanOpaque) scan->opaque;
    1411     2113026 :     keysz = so->numberOfKeys;
    1412             : 
    1413     4263912 :     for (key = so->keyData, ikey = 0; ikey < keysz; key++, ikey++)
    1414             :     {
    1415             :         Datum       datum;
    1416             :         bool        isNull;
    1417             :         Datum       test;
    1418             : 
    1419             :         /* row-comparison keys need special processing */
    1420     2480990 :         if (key->sk_flags & SK_ROW_HEADER)
    1421             :         {
    1422        1027 :             if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan))
    1423      164588 :                 continue;
    1424      331104 :             return NULL;
    1425             :         }
    1426             : 
    1427     2479963 :         datum = index_getattr(tuple,
    1428             :                               key->sk_attno,
    1429             :                               tupdesc,
    1430             :                               &isNull);
    1431             : 
    1432     2479963 :         if (key->sk_flags & SK_ISNULL)
    1433             :         {
    1434             :             /* Handle IS NULL/NOT NULL tests */
    1435      172546 :             if (key->sk_flags & SK_SEARCHNULL)
    1436             :             {
    1437        8022 :                 if (isNull)
    1438          20 :                     continue;   /* tuple satisfies this qual */
    1439             :             }
    1440             :             else
    1441             :             {
    1442      164524 :                 Assert(key->sk_flags & SK_SEARCHNOTNULL);
    1443      164524 :                 if (!isNull)
    1444      164514 :                     continue;   /* tuple satisfies this qual */
    1445             :             }
    1446             : 
    1447             :             /*
    1448             :              * Tuple fails this qual.  If it's a required qual for the current
    1449             :              * scan direction, then we can conclude no further tuples will
    1450             :              * pass, either.
    1451             :              */
    1452        8012 :             if ((key->sk_flags & SK_BT_REQFWD) &&
    1453             :                 ScanDirectionIsForward(dir))
    1454           4 :                 *continuescan = false;
    1455        8008 :             else if ((key->sk_flags & SK_BT_REQBKWD) &&
    1456             :                      ScanDirectionIsBackward(dir))
    1457           0 :                 *continuescan = false;
    1458             : 
    1459             :             /*
    1460             :              * In any case, this indextuple doesn't match the qual.
    1461             :              */
    1462        8012 :             return NULL;
    1463             :         }
    1464             : 
    1465     2307417 :         if (isNull)
    1466             :         {
    1467          20 :             if (key->sk_flags & SK_BT_NULLS_FIRST)
    1468             :             {
    1469             :                 /*
    1470             :                  * Since NULLs are sorted before non-NULLs, we know we have
    1471             :                  * reached the lower limit of the range of values for this
    1472             :                  * index attr.  On a backward scan, we can stop if this qual
    1473             :                  * is one of the "must match" subset.  We can stop regardless
    1474             :                  * of whether the qual is > or <, so long as it's required,
    1475             :                  * because it's not possible for any future tuples to pass. On
    1476             :                  * a forward scan, however, we must keep going, because we may
    1477             :                  * have initially positioned to the start of the index.
    1478             :                  */
    1479           0 :                 if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
    1480             :                     ScanDirectionIsBackward(dir))
    1481           0 :                     *continuescan = false;
    1482             :             }
    1483             :             else
    1484             :             {
    1485             :                 /*
    1486             :                  * Since NULLs are sorted after non-NULLs, we know we have
    1487             :                  * reached the upper limit of the range of values for this
    1488             :                  * index attr.  On a forward scan, we can stop if this qual is
    1489             :                  * one of the "must match" subset.  We can stop regardless of
    1490             :                  * whether the qual is > or <, so long as it's required,
    1491             :                  * because it's not possible for any future tuples to pass. On
    1492             :                  * a backward scan, however, we must keep going, because we
    1493             :                  * may have initially positioned to the end of the index.
    1494             :                  */
    1495          20 :                 if ((key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
    1496             :                     ScanDirectionIsForward(dir))
    1497          12 :                     *continuescan = false;
    1498             :             }
    1499             : 
    1500             :             /*
    1501             :              * In any case, this indextuple doesn't match the qual.
    1502             :              */
    1503          20 :             return NULL;
    1504             :         }
    1505             : 
    1506     2307397 :         test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
    1507             :                                  datum, key->sk_argument);
    1508             : 
    1509     2307397 :         if (!DatumGetBool(test))
    1510             :         {
    1511             :             /*
    1512             :              * Tuple fails this qual.  If it's a required qual for the current
    1513             :              * scan direction, then we can conclude no further tuples will
    1514             :              * pass, either.
    1515             :              *
    1516             :              * Note: because we stop the scan as soon as any required equality
    1517             :              * qual fails, it is critical that equality quals be used for the
    1518             :              * initial positioning in _bt_first() when they are available. See
    1519             :              * comments in _bt_first().
    1520             :              */
    1521      321072 :             if ((key->sk_flags & SK_BT_REQFWD) &&
    1522             :                 ScanDirectionIsForward(dir))
    1523      302096 :                 *continuescan = false;
    1524       18976 :             else if ((key->sk_flags & SK_BT_REQBKWD) &&
    1525             :                      ScanDirectionIsBackward(dir))
    1526          13 :                 *continuescan = false;
    1527             : 
    1528             :             /*
    1529             :              * In any case, this indextuple doesn't match the qual.
    1530             :              */
    1531      321072 :             return NULL;
    1532             :         }
    1533             :     }
    1534             : 
    1535             :     /* Check for failure due to it being a killed tuple. */
    1536     1782922 :     if (!tuple_alive)
    1537         361 :         return NULL;
    1538             : 
    1539             :     /* If we get here, the tuple passes all index quals. */
    1540     1782561 :     return tuple;
    1541             : }
    1542             : 
    1543             : /*
    1544             :  * Test whether an indextuple satisfies a row-comparison scan condition.
    1545             :  *
    1546             :  * Return true if so, false if not.  If not, also clear *continuescan if
    1547             :  * it's not possible for any future tuples in the current scan direction
    1548             :  * to pass the qual.
    1549             :  *
    1550             :  * This is a subroutine for _bt_checkkeys, which see for more info.
    1551             :  */
    1552             : static bool
    1553        1027 : _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
    1554             :                      ScanDirection dir, bool *continuescan)
    1555             : {
    1556        1027 :     ScanKey     subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
    1557        1027 :     int32       cmpresult = 0;
    1558             :     bool        result;
    1559             : 
    1560             :     /* First subkey should be same as the header says */
    1561        1027 :     Assert(subkey->sk_attno == skey->sk_attno);
    1562             : 
    1563             :     /* Loop over columns of the row condition */
    1564             :     for (;;)
    1565             :     {
    1566             :         Datum       datum;
    1567             :         bool        isNull;
    1568             : 
    1569        2033 :         Assert(subkey->sk_flags & SK_ROW_MEMBER);
    1570             : 
    1571        2033 :         datum = index_getattr(tuple,
    1572             :                               subkey->sk_attno,
    1573             :                               tupdesc,
    1574             :                               &isNull);
    1575             : 
    1576        2033 :         if (isNull)
    1577             :         {
    1578        1000 :             if (subkey->sk_flags & SK_BT_NULLS_FIRST)
    1579             :             {
    1580             :                 /*
    1581             :                  * Since NULLs are sorted before non-NULLs, we know we have
    1582             :                  * reached the lower limit of the range of values for this
    1583             :                  * index attr.  On a backward scan, we can stop if this qual
    1584             :                  * is one of the "must match" subset.  We can stop regardless
    1585             :                  * of whether the qual is > or <, so long as it's required,
    1586             :                  * because it's not possible for any future tuples to pass. On
    1587             :                  * a forward scan, however, we must keep going, because we may
    1588             :                  * have initially positioned to the start of the index.
    1589             :                  */
    1590           0 :                 if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
    1591             :                     ScanDirectionIsBackward(dir))
    1592           0 :                     *continuescan = false;
    1593             :             }
    1594             :             else
    1595             :             {
    1596             :                 /*
    1597             :                  * Since NULLs are sorted after non-NULLs, we know we have
    1598             :                  * reached the upper limit of the range of values for this
    1599             :                  * index attr.  On a forward scan, we can stop if this qual is
    1600             :                  * one of the "must match" subset.  We can stop regardless of
    1601             :                  * whether the qual is > or <, so long as it's required,
    1602             :                  * because it's not possible for any future tuples to pass. On
    1603             :                  * a backward scan, however, we must keep going, because we
    1604             :                  * may have initially positioned to the end of the index.
    1605             :                  */
    1606        1000 :                 if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
    1607             :                     ScanDirectionIsForward(dir))
    1608           0 :                     *continuescan = false;
    1609             :             }
    1610             : 
    1611             :             /*
    1612             :              * In any case, this indextuple doesn't match the qual.
    1613             :              */
    1614        2000 :             return false;
    1615             :         }
    1616             : 
    1617        1033 :         if (subkey->sk_flags & SK_ISNULL)
    1618             :         {
    1619             :             /*
    1620             :              * Unlike the simple-scankey case, this isn't a disallowed case.
    1621             :              * But it can never match.  If all the earlier row comparison
    1622             :              * columns are required for the scan direction, we can stop the
    1623             :              * scan, because there can't be another tuple that will succeed.
    1624             :              */
    1625           0 :             if (subkey != (ScanKey) DatumGetPointer(skey->sk_argument))
    1626           0 :                 subkey--;
    1627           0 :             if ((subkey->sk_flags & SK_BT_REQFWD) &&
    1628             :                 ScanDirectionIsForward(dir))
    1629           0 :                 *continuescan = false;
    1630           0 :             else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
    1631             :                      ScanDirectionIsBackward(dir))
    1632           0 :                 *continuescan = false;
    1633           0 :             return false;
    1634             :         }
    1635             : 
    1636             :         /* Perform the test --- three-way comparison not bool operator */
    1637        1033 :         cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
    1638             :                                                     subkey->sk_collation,
    1639             :                                                     datum,
    1640             :                                                     subkey->sk_argument));
    1641             : 
    1642        1033 :         if (subkey->sk_flags & SK_BT_DESC)
    1643           0 :             cmpresult = -cmpresult;
    1644             : 
    1645             :         /* Done comparing if unequal, else advance to next column */
    1646        1033 :         if (cmpresult != 0)
    1647          54 :             break;
    1648             : 
    1649        1006 :         if (subkey->sk_flags & SK_ROW_END)
    1650           0 :             break;
    1651        1006 :         subkey++;
    1652        1006 :     }
    1653             : 
    1654             :     /*
    1655             :      * At this point cmpresult indicates the overall result of the row
    1656             :      * comparison, and subkey points to the deciding column (or the last
    1657             :      * column if the result is "=").
    1658             :      */
    1659          27 :     switch (subkey->sk_strategy)
    1660             :     {
    1661             :             /* EQ and NE cases aren't allowed here */
    1662             :         case BTLessStrategyNumber:
    1663           0 :             result = (cmpresult < 0);
    1664           0 :             break;
    1665             :         case BTLessEqualStrategyNumber:
    1666           0 :             result = (cmpresult <= 0);
    1667           0 :             break;
    1668             :         case BTGreaterEqualStrategyNumber:
    1669          25 :             result = (cmpresult >= 0);
    1670          25 :             break;
    1671             :         case BTGreaterStrategyNumber:
    1672           2 :             result = (cmpresult > 0);
    1673           2 :             break;
    1674             :         default:
    1675           0 :             elog(ERROR, "unrecognized RowCompareType: %d",
    1676             :                  (int) subkey->sk_strategy);
    1677             :             result = 0;         /* keep compiler quiet */
    1678             :             break;
    1679             :     }
    1680             : 
    1681          27 :     if (!result)
    1682             :     {
    1683             :         /*
    1684             :          * Tuple fails this qual.  If it's a required qual for the current
    1685             :          * scan direction, then we can conclude no further tuples will pass,
    1686             :          * either.  Note we have to look at the deciding column, not
    1687             :          * necessarily the first or last column of the row condition.
    1688             :          */
    1689           0 :         if ((subkey->sk_flags & SK_BT_REQFWD) &&
    1690             :             ScanDirectionIsForward(dir))
    1691           0 :             *continuescan = false;
    1692           0 :         else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
    1693             :                  ScanDirectionIsBackward(dir))
    1694           0 :             *continuescan = false;
    1695             :     }
    1696             : 
    1697          27 :     return result;
    1698             : }
    1699             : 
    1700             : /*
    1701             :  * _bt_killitems - set LP_DEAD state for items an indexscan caller has
    1702             :  * told us were killed
    1703             :  *
    1704             :  * scan->opaque, referenced locally through so, contains information about the
    1705             :  * current page and killed tuples thereon (generally, this should only be
    1706             :  * called if so->numKilled > 0).
    1707             :  *
    1708             :  * The caller does not have a lock on the page and may or may not have the
    1709             :  * page pinned in a buffer.  Note that read-lock is sufficient for setting
    1710             :  * LP_DEAD status (which is only a hint).
    1711             :  *
    1712             :  * We match items by heap TID before assuming they are the right ones to
    1713             :  * delete.  We cope with cases where items have moved right due to insertions.
    1714             :  * If an item has moved off the current page due to a split, we'll fail to
    1715             :  * find it and do nothing (this is not an error case --- we assume the item
    1716             :  * will eventually get marked in a future indexscan).
    1717             :  *
    1718             :  * Note that if we hold a pin on the target page continuously from initially
    1719             :  * reading the items until applying this function, VACUUM cannot have deleted
    1720             :  * any items from the page, and so there is no need to search left from the
    1721             :  * recorded offset.  (This observation also guarantees that the item is still
    1722             :  * the right one to delete, which might otherwise be questionable since heap
    1723             :  * TIDs can get recycled.)  This holds true even if the page has been modified
    1724             :  * by inserts and page splits, so there is no need to consult the LSN.
    1725             :  *
    1726             :  * If the pin was released after reading the page, then we re-read it.  If it
    1727             :  * has been modified since we read it (as determined by the LSN), we dare not
    1728             :  * flag any entries because it is possible that the old entry was vacuumed
    1729             :  * away and the TID was re-used by a completely different heap tuple.
    1730             :  */
    1731             : void
    1732        2881 : _bt_killitems(IndexScanDesc scan)
    1733             : {
    1734        2881 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
    1735             :     Page        page;
    1736             :     BTPageOpaque opaque;
    1737             :     OffsetNumber minoff;
    1738             :     OffsetNumber maxoff;
    1739             :     int         i;
    1740        2881 :     int         numKilled = so->numKilled;
    1741        2881 :     bool        killedsomething = false;
    1742             : 
    1743        2881 :     Assert(BTScanPosIsValid(so->currPos));
    1744             : 
    1745             :     /*
    1746             :      * Always reset the scan state, so we don't look for same items on other
    1747             :      * pages.
    1748             :      */
    1749        2881 :     so->numKilled = 0;
    1750             : 
    1751        2881 :     if (BTScanPosIsPinned(so->currPos))
    1752             :     {
    1753             :         /*
    1754             :          * We have held the pin on this page since we read the index tuples,
    1755             :          * so all we need to do is lock it.  The pin will have prevented
    1756             :          * re-use of any TID on the page, so there is no need to check the
    1757             :          * LSN.
    1758             :          */
    1759          32 :         LockBuffer(so->currPos.buf, BT_READ);
    1760             : 
    1761          32 :         page = BufferGetPage(so->currPos.buf);
    1762             :     }
    1763             :     else
    1764             :     {
    1765             :         Buffer      buf;
    1766             : 
    1767             :         /* Attempt to re-read the buffer, getting pin and lock. */
    1768        2849 :         buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ);
    1769             : 
    1770             :         /* It might not exist anymore; in which case we can't hint it. */
    1771        2849 :         if (!BufferIsValid(buf))
    1772           0 :             return;
    1773             : 
    1774        2849 :         page = BufferGetPage(buf);
    1775        2849 :         if (PageGetLSN(page) == so->currPos.lsn)
    1776        2837 :             so->currPos.buf = buf;
    1777             :         else
    1778             :         {
    1779             :             /* Modified while not pinned means hinting is not safe. */
    1780          12 :             _bt_relbuf(scan->indexRelation, buf);
    1781          12 :             return;
    1782             :         }
    1783             :     }
    1784             : 
    1785        2869 :     opaque = (BTPageOpaque) PageGetSpecialPointer(page);
    1786        2869 :     minoff = P_FIRSTDATAKEY(opaque);
    1787        2869 :     maxoff = PageGetMaxOffsetNumber(page);
    1788             : 
    1789        7964 :     for (i = 0; i < numKilled; i++)
    1790             :     {
    1791        5095 :         int         itemIndex = so->killedItems[i];
    1792        5095 :         BTScanPosItem *kitem = &so->currPos.items[itemIndex];
    1793        5095 :         OffsetNumber offnum = kitem->indexOffset;
    1794             : 
    1795        5095 :         Assert(itemIndex >= so->currPos.firstItem &&
    1796             :                itemIndex <= so->currPos.lastItem);
    1797        5095 :         if (offnum < minoff)
    1798           0 :             continue;           /* pure paranoia */
    1799       10190 :         while (offnum <= maxoff)
    1800             :         {
    1801        5095 :             ItemId      iid = PageGetItemId(page, offnum);
    1802        5095 :             IndexTuple  ituple = (IndexTuple) PageGetItem(page, iid);
    1803             : 
    1804        5095 :             if (ItemPointerEquals(&ituple->t_tid, &kitem->heapTid))
    1805             :             {
    1806             :                 /* found the item */
    1807        5095 :                 ItemIdMarkDead(iid);
    1808        5095 :                 killedsomething = true;
    1809        5095 :                 break;          /* out of inner search loop */
    1810             :             }
    1811           0 :             offnum = OffsetNumberNext(offnum);
    1812             :         }
    1813             :     }
    1814             : 
    1815             :     /*
    1816             :      * Since this can be redone later if needed, mark as dirty hint.
    1817             :      *
    1818             :      * Whenever we mark anything LP_DEAD, we also set the page's
    1819             :      * BTP_HAS_GARBAGE flag, which is likewise just a hint.
    1820             :      */
    1821        2869 :     if (killedsomething)
    1822             :     {
    1823        2869 :         opaque->btpo_flags |= BTP_HAS_GARBAGE;
    1824        2869 :         MarkBufferDirtyHint(so->currPos.buf, true);
    1825             :     }
    1826             : 
    1827        2869 :     LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
    1828             : }
    1829             : 
    1830             : 
    1831             : /*
    1832             :  * The following routines manage a shared-memory area in which we track
    1833             :  * assignment of "vacuum cycle IDs" to currently-active btree vacuuming
    1834             :  * operations.  There is a single counter which increments each time we
    1835             :  * start a vacuum to assign it a cycle ID.  Since multiple vacuums could
    1836             :  * be active concurrently, we have to track the cycle ID for each active
    1837             :  * vacuum; this requires at most MaxBackends entries (usually far fewer).
    1838             :  * We assume at most one vacuum can be active for a given index.
    1839             :  *
    1840             :  * Access to the shared memory area is controlled by BtreeVacuumLock.
    1841             :  * In principle we could use a separate lmgr locktag for each index,
    1842             :  * but a single LWLock is much cheaper, and given the short time that
    1843             :  * the lock is ever held, the concurrency hit should be minimal.
    1844             :  */
    1845             : 
    1846             : typedef struct BTOneVacInfo
    1847             : {
    1848             :     LockRelId   relid;          /* global identifier of an index */
    1849             :     BTCycleId   cycleid;        /* cycle ID for its active VACUUM */
    1850             : } BTOneVacInfo;
    1851             : 
    1852             : typedef struct BTVacInfo
    1853             : {
    1854             :     BTCycleId   cycle_ctr;      /* cycle ID most recently assigned */
    1855             :     int         num_vacuums;    /* number of currently active VACUUMs */
    1856             :     int         max_vacuums;    /* allocated length of vacuums[] array */
    1857             :     BTOneVacInfo vacuums[FLEXIBLE_ARRAY_MEMBER];
    1858             : } BTVacInfo;
    1859             : 
    1860             : static BTVacInfo *btvacinfo;
    1861             : 
    1862             : 
    1863             : /*
    1864             :  * _bt_vacuum_cycleid --- get the active vacuum cycle ID for an index,
    1865             :  *      or zero if there is no active VACUUM
    1866             :  *
    1867             :  * Note: for correct interlocking, the caller must already hold pin and
    1868             :  * exclusive lock on each buffer it will store the cycle ID into.  This
    1869             :  * ensures that even if a VACUUM starts immediately afterwards, it cannot
    1870             :  * process those pages until the page split is complete.
    1871             :  */
    1872             : BTCycleId
    1873         888 : _bt_vacuum_cycleid(Relation rel)
    1874             : {
    1875         888 :     BTCycleId   result = 0;
    1876             :     int         i;
    1877             : 
    1878             :     /* Share lock is enough since this is a read-only operation */
    1879         888 :     LWLockAcquire(BtreeVacuumLock, LW_SHARED);
    1880             : 
    1881         888 :     for (i = 0; i < btvacinfo->num_vacuums; i++)
    1882             :     {
    1883           0 :         BTOneVacInfo *vac = &btvacinfo->vacuums[i];
    1884             : 
    1885           0 :         if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
    1886           0 :             vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
    1887             :         {
    1888           0 :             result = vac->cycleid;
    1889           0 :             break;
    1890             :         }
    1891             :     }
    1892             : 
    1893         888 :     LWLockRelease(BtreeVacuumLock);
    1894         888 :     return result;
    1895             : }
    1896             : 
    1897             : /*
    1898             :  * _bt_start_vacuum --- assign a cycle ID to a just-starting VACUUM operation
    1899             :  *
    1900             :  * Note: the caller must guarantee that it will eventually call
    1901             :  * _bt_end_vacuum, else we'll permanently leak an array slot.  To ensure
    1902             :  * that this happens even in elog(FATAL) scenarios, the appropriate coding
    1903             :  * is not just a PG_TRY, but
    1904             :  *      PG_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel))
    1905             :  */
    1906             : BTCycleId
    1907         109 : _bt_start_vacuum(Relation rel)
    1908             : {
    1909             :     BTCycleId   result;
    1910             :     int         i;
    1911             :     BTOneVacInfo *vac;
    1912             : 
    1913         109 :     LWLockAcquire(BtreeVacuumLock, LW_EXCLUSIVE);
    1914             : 
    1915             :     /*
    1916             :      * Assign the next cycle ID, being careful to avoid zero as well as the
    1917             :      * reserved high values.
    1918             :      */
    1919         109 :     result = ++(btvacinfo->cycle_ctr);
    1920         109 :     if (result == 0 || result > MAX_BT_CYCLE_ID)
    1921           0 :         result = btvacinfo->cycle_ctr = 1;
    1922             : 
    1923             :     /* Let's just make sure there's no entry already for this index */
    1924         109 :     for (i = 0; i < btvacinfo->num_vacuums; i++)
    1925             :     {
    1926           0 :         vac = &btvacinfo->vacuums[i];
    1927           0 :         if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
    1928           0 :             vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
    1929             :         {
    1930             :             /*
    1931             :              * Unlike most places in the backend, we have to explicitly
    1932             :              * release our LWLock before throwing an error.  This is because
    1933             :              * we expect _bt_end_vacuum() to be called before transaction
    1934             :              * abort cleanup can run to release LWLocks.
    1935             :              */
    1936           0 :             LWLockRelease(BtreeVacuumLock);
    1937           0 :             elog(ERROR, "multiple active vacuums for index \"%s\"",
    1938             :                  RelationGetRelationName(rel));
    1939             :         }
    1940             :     }
    1941             : 
    1942             :     /* OK, add an entry */
    1943         109 :     if (btvacinfo->num_vacuums >= btvacinfo->max_vacuums)
    1944             :     {
    1945           0 :         LWLockRelease(BtreeVacuumLock);
    1946           0 :         elog(ERROR, "out of btvacinfo slots");
    1947             :     }
    1948         109 :     vac = &btvacinfo->vacuums[btvacinfo->num_vacuums];
    1949         109 :     vac->relid = rel->rd_lockInfo.lockRelId;
    1950         109 :     vac->cycleid = result;
    1951         109 :     btvacinfo->num_vacuums++;
    1952             : 
    1953         109 :     LWLockRelease(BtreeVacuumLock);
    1954         109 :     return result;
    1955             : }
    1956             : 
    1957             : /*
    1958             :  * _bt_end_vacuum --- mark a btree VACUUM operation as done
    1959             :  *
    1960             :  * Note: this is deliberately coded not to complain if no entry is found;
    1961             :  * this allows the caller to put PG_TRY around the start_vacuum operation.
    1962             :  */
    1963             : void
    1964         109 : _bt_end_vacuum(Relation rel)
    1965             : {
    1966             :     int         i;
    1967             : 
    1968         109 :     LWLockAcquire(BtreeVacuumLock, LW_EXCLUSIVE);
    1969             : 
    1970             :     /* Find the array entry */
    1971         109 :     for (i = 0; i < btvacinfo->num_vacuums; i++)
    1972             :     {
    1973         109 :         BTOneVacInfo *vac = &btvacinfo->vacuums[i];
    1974             : 
    1975         218 :         if (vac->relid.relId == rel->rd_lockInfo.lockRelId.relId &&
    1976         109 :             vac->relid.dbId == rel->rd_lockInfo.lockRelId.dbId)
    1977             :         {
    1978             :             /* Remove it by shifting down the last entry */
    1979         109 :             *vac = btvacinfo->vacuums[btvacinfo->num_vacuums - 1];
    1980         109 :             btvacinfo->num_vacuums--;
    1981         109 :             break;
    1982             :         }
    1983             :     }
    1984             : 
    1985         109 :     LWLockRelease(BtreeVacuumLock);
    1986         109 : }
    1987             : 
    1988             : /*
    1989             :  * _bt_end_vacuum wrapped as an on_shmem_exit callback function
    1990             :  */
    1991             : void
    1992           0 : _bt_end_vacuum_callback(int code, Datum arg)
    1993             : {
    1994           0 :     _bt_end_vacuum((Relation) DatumGetPointer(arg));
    1995           0 : }
    1996             : 
    1997             : /*
    1998             :  * BTreeShmemSize --- report amount of shared memory space needed
    1999             :  */
    2000             : Size
    2001          10 : BTreeShmemSize(void)
    2002             : {
    2003             :     Size        size;
    2004             : 
    2005          10 :     size = offsetof(BTVacInfo, vacuums);
    2006          10 :     size = add_size(size, mul_size(MaxBackends, sizeof(BTOneVacInfo)));
    2007          10 :     return size;
    2008             : }
    2009             : 
    2010             : /*
    2011             :  * BTreeShmemInit --- initialize this module's shared memory
    2012             :  */
    2013             : void
    2014           5 : BTreeShmemInit(void)
    2015             : {
    2016             :     bool        found;
    2017             : 
    2018           5 :     btvacinfo = (BTVacInfo *) ShmemInitStruct("BTree Vacuum State",
    2019             :                                               BTreeShmemSize(),
    2020             :                                               &found);
    2021             : 
    2022           5 :     if (!IsUnderPostmaster)
    2023             :     {
    2024             :         /* Initialize shared memory area */
    2025           5 :         Assert(!found);
    2026             : 
    2027             :         /*
    2028             :          * It doesn't really matter what the cycle counter starts at, but
    2029             :          * having it always start the same doesn't seem good.  Seed with
    2030             :          * low-order bits of time() instead.
    2031             :          */
    2032           5 :         btvacinfo->cycle_ctr = (BTCycleId) time(NULL);
    2033             : 
    2034           5 :         btvacinfo->num_vacuums = 0;
    2035           5 :         btvacinfo->max_vacuums = MaxBackends;
    2036             :     }
    2037             :     else
    2038           0 :         Assert(found);
    2039           5 : }
    2040             : 
    2041             : bytea *
    2042           7 : btoptions(Datum reloptions, bool validate)
    2043             : {
    2044           7 :     return default_reloptions(reloptions, validate, RELOPT_KIND_BTREE);
    2045             : }
    2046             : 
    2047             : /*
    2048             :  *  btproperty() -- Check boolean properties of indexes.
    2049             :  *
    2050             :  * This is optional, but handling AMPROP_RETURNABLE here saves opening the rel
    2051             :  * to call btcanreturn.
    2052             :  */
    2053             : bool
    2054          98 : btproperty(Oid index_oid, int attno,
    2055             :            IndexAMProperty prop, const char *propname,
    2056             :            bool *res, bool *isnull)
    2057             : {
    2058          98 :     switch (prop)
    2059             :     {
    2060             :         case AMPROP_RETURNABLE:
    2061             :             /* answer only for columns, not AM or whole index */
    2062           4 :             if (attno == 0)
    2063           2 :                 return false;
    2064             :             /* otherwise, btree can always return data */
    2065           2 :             *res = true;
    2066           2 :             return true;
    2067             : 
    2068             :         default:
    2069          94 :             return false;       /* punt to generic code */
    2070             :     }
    2071             : }

Generated by: LCOV version 1.11