LCOV - PostgreSQL - src/backend/executor/execGrouping.c

LCOV - code coverage report

Current view:	top level - src/backend/executor - execGrouping.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL	Lines:	131	135	97.0 %
Date:	2017-09-29 15:12:54	Functions:	9	9	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * execGrouping.c
       4             :  *    executor utility routines for grouping, hashing, and aggregation
       5             :  *
       6             :  * Note: we currently assume that equality and hashing functions are not
       7             :  * collation-sensitive, so the code in this file has no support for passing
       8             :  * collation settings through from callers.  That may have to change someday.
       9             :  *
      10             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      11             :  * Portions Copyright (c) 1994, Regents of the University of California
      12             :  *
      13             :  *
      14             :  * IDENTIFICATION
      15             :  *    src/backend/executor/execGrouping.c
      16             :  *
      17             :  *-------------------------------------------------------------------------
      18             :  */
      19             : #include "postgres.h"
      20             : 
      21             : #include "access/hash.h"
      22             : #include "access/parallel.h"
      23             : #include "executor/executor.h"
      24             : #include "miscadmin.h"
      25             : #include "utils/lsyscache.h"
      26             : #include "utils/memutils.h"
      27             : 
      28             : static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
      29             : static int  TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
      30             : 
      31             : /*
      32             :  * Define parameters for tuple hash table code generation. The interface is
      33             :  * *also* declared in execnodes.h (to generate the types, which are externally
      34             :  * visible).
      35             :  */
      36             : #define SH_PREFIX tuplehash
      37             : #define SH_ELEMENT_TYPE TupleHashEntryData
      38             : #define SH_KEY_TYPE MinimalTuple
      39             : #define SH_KEY firstTuple
      40             : #define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key)
      41             : #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
      42             : #define SH_SCOPE extern
      43             : #define SH_STORE_HASH
      44             : #define SH_GET_HASH(tb, a) a->hash
      45             : #define SH_DEFINE
      46             : #include "lib/simplehash.h"
      47             : 
      48             : 
      49             : /*****************************************************************************
      50             :  *      Utility routines for grouping tuples together
      51             :  *****************************************************************************/
      52             : 
      53             : /*
      54             :  * execTuplesMatch
      55             :  *      Return true if two tuples match in all the indicated fields.
      56             :  *
      57             :  * This actually implements SQL's notion of "not distinct".  Two nulls
      58             :  * match, a null and a not-null don't match.
      59             :  *
      60             :  * slot1, slot2: the tuples to compare (must have same columns!)
      61             :  * numCols: the number of attributes to be examined
      62             :  * matchColIdx: array of attribute column numbers
      63             :  * eqFunctions: array of fmgr lookup info for the equality functions to use
      64             :  * evalContext: short-term memory context for executing the functions
      65             :  *
      66             :  * NB: evalContext is reset each time!
      67             :  */
      68             : bool
      69      446576 : execTuplesMatch(TupleTableSlot *slot1,
      70             :                 TupleTableSlot *slot2,
      71             :                 int numCols,
      72             :                 AttrNumber *matchColIdx,
      73             :                 FmgrInfo *eqfunctions,
      74             :                 MemoryContext evalContext)
      75             : {
      76             :     MemoryContext oldContext;
      77             :     bool        result;
      78             :     int         i;
      79             : 
      80             :     /* Reset and switch into the temp context. */
      81      446576 :     MemoryContextReset(evalContext);
      82      446576 :     oldContext = MemoryContextSwitchTo(evalContext);
      83             : 
      84             :     /*
      85             :      * We cannot report a match without checking all the fields, but we can
      86             :      * report a non-match as soon as we find unequal fields.  So, start
      87             :      * comparing at the last field (least significant sort key). That's the
      88             :      * most likely to be different if we are dealing with sorted input.
      89             :      */
      90      446576 :     result = true;
      91             : 
      92     1442169 :     for (i = numCols; --i >= 0;)
      93             :     {
      94      572399 :         AttrNumber  att = matchColIdx[i];
      95             :         Datum       attr1,
      96             :                     attr2;
      97             :         bool        isNull1,
      98             :                     isNull2;
      99             : 
     100      572399 :         attr1 = slot_getattr(slot1, att, &isNull1);
     101             : 
     102      572399 :         attr2 = slot_getattr(slot2, att, &isNull2);
     103             : 
     104      572399 :         if (isNull1 != isNull2)
     105             :         {
     106          21 :             result = false;     /* one null and one not; they aren't equal */
     107       23403 :             break;
     108             :         }
     109             : 
     110      572378 :         if (isNull1)
     111         473 :             continue;           /* both are null, treat as equal */
     112             : 
     113             :         /* Apply the type-specific equality function */
     114             : 
     115      571905 :         if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
     116             :                                         attr1, attr2)))
     117             :         {
     118       23361 :             result = false;     /* they aren't equal */
     119       23361 :             break;
     120             :         }
     121             :     }
     122             : 
     123      446576 :     MemoryContextSwitchTo(oldContext);
     124             : 
     125      446576 :     return result;
     126             : }
     127             : 
     128             : /*
     129             :  * execTuplesUnequal
     130             :  *      Return true if two tuples are definitely unequal in the indicated
     131             :  *      fields.
     132             :  *
     133             :  * Nulls are neither equal nor unequal to anything else.  A true result
     134             :  * is obtained only if there are non-null fields that compare not-equal.
     135             :  *
     136             :  * Parameters are identical to execTuplesMatch.
     137             :  */
     138             : bool
     139           4 : execTuplesUnequal(TupleTableSlot *slot1,
     140             :                   TupleTableSlot *slot2,
     141             :                   int numCols,
     142             :                   AttrNumber *matchColIdx,
     143             :                   FmgrInfo *eqfunctions,
     144             :                   MemoryContext evalContext)
     145             : {
     146             :     MemoryContext oldContext;
     147             :     bool        result;
     148             :     int         i;
     149             : 
     150             :     /* Reset and switch into the temp context. */
     151           4 :     MemoryContextReset(evalContext);
     152           4 :     oldContext = MemoryContextSwitchTo(evalContext);
     153             : 
     154             :     /*
     155             :      * We cannot report a match without checking all the fields, but we can
     156             :      * report a non-match as soon as we find unequal fields.  So, start
     157             :      * comparing at the last field (least significant sort key). That's the
     158             :      * most likely to be different if we are dealing with sorted input.
     159             :      */
     160           4 :     result = false;
     161             : 
     162          14 :     for (i = numCols; --i >= 0;)
     163             :     {
     164           8 :         AttrNumber  att = matchColIdx[i];
     165             :         Datum       attr1,
     166             :                     attr2;
     167             :         bool        isNull1,
     168             :                     isNull2;
     169             : 
     170           8 :         attr1 = slot_getattr(slot1, att, &isNull1);
     171             : 
     172           8 :         if (isNull1)
     173           6 :             continue;           /* can't prove anything here */
     174             : 
     175           6 :         attr2 = slot_getattr(slot2, att, &isNull2);
     176             : 
     177           6 :         if (isNull2)
     178           2 :             continue;           /* can't prove anything here */
     179             : 
     180             :         /* Apply the type-specific equality function */
     181             : 
     182           4 :         if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
     183             :                                         attr1, attr2)))
     184             :         {
     185           2 :             result = true;      /* they are unequal */
     186           2 :             break;
     187             :         }
     188             :     }
     189             : 
     190           4 :     MemoryContextSwitchTo(oldContext);
     191             : 
     192           4 :     return result;
     193             : }
     194             : 
     195             : 
     196             : /*
     197             :  * execTuplesMatchPrepare
     198             :  *      Look up the equality functions needed for execTuplesMatch or
     199             :  *      execTuplesUnequal, given an array of equality operator OIDs.
     200             :  *
     201             :  * The result is a palloc'd array.
     202             :  */
     203             : FmgrInfo *
     204         409 : execTuplesMatchPrepare(int numCols,
     205             :                        Oid *eqOperators)
     206             : {
     207         409 :     FmgrInfo   *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
     208             :     int         i;
     209             : 
     210        1000 :     for (i = 0; i < numCols; i++)
     211             :     {
     212         591 :         Oid         eq_opr = eqOperators[i];
     213             :         Oid         eq_function;
     214             : 
     215         591 :         eq_function = get_opcode(eq_opr);
     216         591 :         fmgr_info(eq_function, &eqFunctions[i]);
     217             :     }
     218             : 
     219         409 :     return eqFunctions;
     220             : }
     221             : 
     222             : /*
     223             :  * execTuplesHashPrepare
     224             :  *      Look up the equality and hashing functions needed for a TupleHashTable.
     225             :  *
     226             :  * This is similar to execTuplesMatchPrepare, but we also need to find the
     227             :  * hash functions associated with the equality operators.  *eqFunctions and
     228             :  * *hashFunctions receive the palloc'd result arrays.
     229             :  *
     230             :  * Note: we expect that the given operators are not cross-type comparisons.
     231             :  */
     232             : void
     233         359 : execTuplesHashPrepare(int numCols,
     234             :                       Oid *eqOperators,
     235             :                       FmgrInfo **eqFunctions,
     236             :                       FmgrInfo **hashFunctions)
     237             : {
     238             :     int         i;
     239             : 
     240         359 :     *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
     241         359 :     *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
     242             : 
     243         836 :     for (i = 0; i < numCols; i++)
     244             :     {
     245         477 :         Oid         eq_opr = eqOperators[i];
     246             :         Oid         eq_function;
     247             :         Oid         left_hash_function;
     248             :         Oid         right_hash_function;
     249             : 
     250         477 :         eq_function = get_opcode(eq_opr);
     251         477 :         if (!get_op_hash_functions(eq_opr,
     252             :                                    &left_hash_function, &right_hash_function))
     253           0 :             elog(ERROR, "could not find hash function for hash operator %u",
     254             :                  eq_opr);
     255             :         /* We're not supporting cross-type cases here */
     256         477 :         Assert(left_hash_function == right_hash_function);
     257         477 :         fmgr_info(eq_function, &(*eqFunctions)[i]);
     258         477 :         fmgr_info(right_hash_function, &(*hashFunctions)[i]);
     259             :     }
     260         359 : }
     261             : 
     262             : 
     263             : /*****************************************************************************
     264             :  *      Utility routines for all-in-memory hash tables
     265             :  *
     266             :  * These routines build hash tables for grouping tuples together (eg, for
     267             :  * hash aggregation).  There is one entry for each not-distinct set of tuples
     268             :  * presented.
     269             :  *****************************************************************************/
     270             : 
     271             : /*
     272             :  * Construct an empty TupleHashTable
     273             :  *
     274             :  *  numCols, keyColIdx: identify the tuple fields to use as lookup key
     275             :  *  eqfunctions: equality comparison functions to use
     276             :  *  hashfunctions: datatype-specific hashing functions to use
     277             :  *  nbuckets: initial estimate of hashtable size
     278             :  *  additionalsize: size of data stored in ->additional
     279             :  *  tablecxt: memory context in which to store table and table entries
     280             :  *  tempcxt: short-lived context for evaluation hash and comparison functions
     281             :  *
     282             :  * The function arrays may be made with execTuplesHashPrepare().  Note they
     283             :  * are not cross-type functions, but expect to see the table datatype(s)
     284             :  * on both sides.
     285             :  *
     286             :  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
     287             :  * storage that will live as long as the hashtable does.
     288             :  */
     289             : TupleHashTable
     290         520 : BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
     291             :                     FmgrInfo *eqfunctions,
     292             :                     FmgrInfo *hashfunctions,
     293             :                     long nbuckets, Size additionalsize,
     294             :                     MemoryContext tablecxt, MemoryContext tempcxt,
     295             :                     bool use_variable_hash_iv)
     296             : {
     297             :     TupleHashTable hashtable;
     298         520 :     Size        entrysize = sizeof(TupleHashEntryData) + additionalsize;
     299             : 
     300         520 :     Assert(nbuckets > 0);
     301             : 
     302             :     /* Limit initial table size request to not more than work_mem */
     303         520 :     nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize));
     304             : 
     305         520 :     hashtable = (TupleHashTable)
     306             :         MemoryContextAlloc(tablecxt, sizeof(TupleHashTableData));
     307             : 
     308         520 :     hashtable->numCols = numCols;
     309         520 :     hashtable->keyColIdx = keyColIdx;
     310         520 :     hashtable->tab_hash_funcs = hashfunctions;
     311         520 :     hashtable->tab_eq_funcs = eqfunctions;
     312         520 :     hashtable->tablecxt = tablecxt;
     313         520 :     hashtable->tempcxt = tempcxt;
     314         520 :     hashtable->entrysize = entrysize;
     315         520 :     hashtable->tableslot = NULL; /* will be made on first lookup */
     316         520 :     hashtable->inputslot = NULL;
     317         520 :     hashtable->in_hash_funcs = NULL;
     318         520 :     hashtable->cur_eq_funcs = NULL;
     319             : 
     320             :     /*
     321             :      * If parallelism is in use, even if the master backend is performing the
     322             :      * scan itself, we don't want to create the hashtable exactly the same way
     323             :      * in all workers. As hashtables are iterated over in keyspace-order,
     324             :      * doing so in all processes in the same way is likely to lead to
     325             :      * "unbalanced" hashtables when the table size initially is
     326             :      * underestimated.
     327             :      */
     328         520 :     if (use_variable_hash_iv)
     329           7 :         hashtable->hash_iv = hash_uint32(ParallelWorkerNumber);
     330             :     else
     331         513 :         hashtable->hash_iv = 0;
     332             : 
     333         520 :     hashtable->hashtab = tuplehash_create(tablecxt, nbuckets, hashtable);
     334             : 
     335         520 :     return hashtable;
     336             : }
     337             : 
     338             : /*
     339             :  * Find or create a hashtable entry for the tuple group containing the
     340             :  * given tuple.  The tuple must be the same type as the hashtable entries.
     341             :  *
     342             :  * If isnew is NULL, we do not create new entries; we return NULL if no
     343             :  * match is found.
     344             :  *
     345             :  * If isnew isn't NULL, then a new entry is created if no existing entry
     346             :  * matches.  On return, *isnew is true if the entry is newly created,
     347             :  * false if it existed already.  ->additional_data in the new entry has
     348             :  * been zeroed.
     349             :  */
     350             : TupleHashEntry
     351      292272 : LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
     352             :                      bool *isnew)
     353             : {
     354             :     TupleHashEntryData *entry;
     355             :     MemoryContext oldContext;
     356             :     bool        found;
     357             :     MinimalTuple key;
     358             : 
     359             :     /* If first time through, clone the input slot to make table slot */
     360      292272 :     if (hashtable->tableslot == NULL)
     361             :     {
     362             :         TupleDesc   tupdesc;
     363             : 
     364         416 :         oldContext = MemoryContextSwitchTo(hashtable->tablecxt);
     365             : 
     366             :         /*
     367             :          * We copy the input tuple descriptor just for safety --- we assume
     368             :          * all input tuples will have equivalent descriptors.
     369             :          */
     370         416 :         tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
     371         416 :         hashtable->tableslot = MakeSingleTupleTableSlot(tupdesc);
     372         416 :         MemoryContextSwitchTo(oldContext);
     373             :     }
     374             : 
     375             :     /* Need to run the hash functions in short-lived context */
     376      292272 :     oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
     377             : 
     378             :     /* set up data needed by hash and match functions */
     379      292272 :     hashtable->inputslot = slot;
     380      292272 :     hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
     381      292272 :     hashtable->cur_eq_funcs = hashtable->tab_eq_funcs;
     382             : 
     383      292272 :     key = NULL;                 /* flag to reference inputslot */
     384             : 
     385      292272 :     if (isnew)
     386             :     {
     387      271839 :         entry = tuplehash_insert(hashtable->hashtab, key, &found);
     388             : 
     389      271839 :         if (found)
     390             :         {
     391             :             /* found pre-existing entry */
     392      240424 :             *isnew = false;
     393             :         }
     394             :         else
     395             :         {
     396             :             /* created new entry */
     397       31415 :             *isnew = true;
     398             :             /* zero caller data */
     399       31415 :             entry->additional = NULL;
     400       31415 :             MemoryContextSwitchTo(hashtable->tablecxt);
     401             :             /* Copy the first tuple into the table context */
     402       31415 :             entry->firstTuple = ExecCopySlotMinimalTuple(slot);
     403             :         }
     404             :     }
     405             :     else
     406             :     {
     407       20433 :         entry = tuplehash_lookup(hashtable->hashtab, key);
     408             :     }
     409             : 
     410      292272 :     MemoryContextSwitchTo(oldContext);
     411             : 
     412      292272 :     return entry;
     413             : }
     414             : 
     415             : /*
     416             :  * Search for a hashtable entry matching the given tuple.  No entry is
     417             :  * created if there's not a match.  This is similar to the non-creating
     418             :  * case of LookupTupleHashEntry, except that it supports cross-type
     419             :  * comparisons, in which the given tuple is not of the same type as the
     420             :  * table entries.  The caller must provide the hash functions to use for
     421             :  * the input tuple, as well as the equality functions, since these may be
     422             :  * different from the table's internal functions.
     423             :  */
     424             : TupleHashEntry
     425       60095 : FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
     426             :                    FmgrInfo *eqfunctions,
     427             :                    FmgrInfo *hashfunctions)
     428             : {
     429             :     TupleHashEntry entry;
     430             :     MemoryContext oldContext;
     431             :     MinimalTuple key;
     432             : 
     433             :     /* Need to run the hash functions in short-lived context */
     434       60095 :     oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
     435             : 
     436             :     /* Set up data needed by hash and match functions */
     437       60095 :     hashtable->inputslot = slot;
     438       60095 :     hashtable->in_hash_funcs = hashfunctions;
     439       60095 :     hashtable->cur_eq_funcs = eqfunctions;
     440             : 
     441             :     /* Search the hash table */
     442       60095 :     key = NULL;                 /* flag to reference inputslot */
     443       60095 :     entry = tuplehash_lookup(hashtable->hashtab, key);
     444       60095 :     MemoryContextSwitchTo(oldContext);
     445             : 
     446       60095 :     return entry;
     447             : }
     448             : 
     449             : /*
     450             :  * Compute the hash value for a tuple
     451             :  *
     452             :  * The passed-in key is a pointer to TupleHashEntryData.  In an actual hash
     453             :  * table entry, the firstTuple field points to a tuple (in MinimalTuple
     454             :  * format).  LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
     455             :  * NULL firstTuple field --- that cues us to look at the inputslot instead.
     456             :  * This convention avoids the need to materialize virtual input tuples unless
     457             :  * they actually need to get copied into the table.
     458             :  *
     459             :  * Also, the caller must select an appropriate memory context for running
     460             :  * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
     461             :  */
     462             : static uint32
     463      352367 : TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
     464             : {
     465      352367 :     TupleHashTable hashtable = (TupleHashTable) tb->private_data;
     466      352367 :     int         numCols = hashtable->numCols;
     467      352367 :     AttrNumber *keyColIdx = hashtable->keyColIdx;
     468      352367 :     uint32      hashkey = hashtable->hash_iv;
     469             :     TupleTableSlot *slot;
     470             :     FmgrInfo   *hashfunctions;
     471             :     int         i;
     472             : 
     473      352367 :     if (tuple == NULL)
     474             :     {
     475             :         /* Process the current input tuple for the table */
     476      352367 :         slot = hashtable->inputslot;
     477      352367 :         hashfunctions = hashtable->in_hash_funcs;
     478             :     }
     479             :     else
     480             :     {
     481             :         /*
     482             :          * Process a tuple already stored in the table.
     483             :          *
     484             :          * (this case never actually occurs due to the way simplehash.h is
     485             :          * used, as the hash-value is stored in the entries)
     486             :          */
     487           0 :         slot = hashtable->tableslot;
     488           0 :         ExecStoreMinimalTuple(tuple, slot, false);
     489           0 :         hashfunctions = hashtable->tab_hash_funcs;
     490             :     }
     491             : 
     492      832052 :     for (i = 0; i < numCols; i++)
     493             :     {
     494      479685 :         AttrNumber  att = keyColIdx[i];
     495             :         Datum       attr;
     496             :         bool        isNull;
     497             : 
     498             :         /* rotate hashkey left 1 bit at each step */
     499      479685 :         hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
     500             : 
     501      479685 :         attr = slot_getattr(slot, att, &isNull);
     502             : 
     503      479685 :         if (!isNull)            /* treat nulls as having hash key 0 */
     504             :         {
     505             :             uint32      hkey;
     506             : 
     507      479233 :             hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
     508             :                                                 attr));
     509      479233 :             hashkey ^= hkey;
     510             :         }
     511             :     }
     512             : 
     513      352367 :     return hashkey;
     514             : }
     515             : 
     516             : /*
     517             :  * See whether two tuples (presumably of the same hash value) match
     518             :  *
     519             :  * As above, the passed pointers are pointers to TupleHashEntryData.
     520             :  *
     521             :  * Also, the caller must select an appropriate memory context for running
     522             :  * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
     523             :  */
     524             : static int
     525      257008 : TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
     526             : {
     527             :     TupleTableSlot *slot1;
     528             :     TupleTableSlot *slot2;
     529      257008 :     TupleHashTable hashtable = (TupleHashTable) tb->private_data;
     530             : 
     531             :     /*
     532             :      * We assume that simplehash.h will only ever call us with the first
     533             :      * argument being an actual table entry, and the second argument being
     534             :      * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
     535             :      * could be supported too, but is not currently required.
     536             :      */
     537      257008 :     Assert(tuple1 != NULL);
     538      257008 :     slot1 = hashtable->tableslot;
     539      257008 :     ExecStoreMinimalTuple(tuple1, slot1, false);
     540      257008 :     Assert(tuple2 == NULL);
     541      257008 :     slot2 = hashtable->inputslot;
     542             : 
     543             :     /* For crosstype comparisons, the inputslot must be first */
     544      257008 :     if (execTuplesMatch(slot2,
     545             :                         slot1,
     546             :                         hashtable->numCols,
     547             :                         hashtable->keyColIdx,
     548             :                         hashtable->cur_eq_funcs,
     549             :                         hashtable->tempcxt))
     550      255958 :         return 0;
     551             :     else
     552        1050 :         return 1;
     553             : }

Generated by: LCOV version 1.11