LCOV - code coverage report
Current view: top level - src/backend/utils/adt - tsvector_op.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 869 1031 84.3 %
Date: 2017-09-29 13:40:31 Functions: 38 49 77.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * tsvector_op.c
       4             :  *    operations over tsvector
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/utils/adt/tsvector_op.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include <limits.h>
      17             : 
      18             : #include "access/htup_details.h"
      19             : #include "catalog/namespace.h"
      20             : #include "catalog/pg_type.h"
      21             : #include "commands/trigger.h"
      22             : #include "executor/spi.h"
      23             : #include "funcapi.h"
      24             : #include "mb/pg_wchar.h"
      25             : #include "miscadmin.h"
      26             : #include "parser/parse_coerce.h"
      27             : #include "tsearch/ts_utils.h"
      28             : #include "utils/builtins.h"
      29             : #include "utils/lsyscache.h"
      30             : #include "utils/regproc.h"
      31             : #include "utils/rel.h"
      32             : 
      33             : 
      34             : typedef struct
      35             : {
      36             :     WordEntry  *arrb;
      37             :     WordEntry  *arre;
      38             :     char       *values;
      39             :     char       *operand;
      40             : } CHKVAL;
      41             : 
      42             : 
      43             : typedef struct StatEntry
      44             : {
      45             :     uint32      ndoc;           /* zero indicates that we were already here
      46             :                                  * while walking through the tree */
      47             :     uint32      nentry;
      48             :     struct StatEntry *left;
      49             :     struct StatEntry *right;
      50             :     uint32      lenlexeme;
      51             :     char        lexeme[FLEXIBLE_ARRAY_MEMBER];
      52             : } StatEntry;
      53             : 
      54             : #define STATENTRYHDRSZ  (offsetof(StatEntry, lexeme))
      55             : 
      56             : typedef struct
      57             : {
      58             :     int32       weight;
      59             : 
      60             :     uint32      maxdepth;
      61             : 
      62             :     StatEntry **stack;
      63             :     uint32      stackpos;
      64             : 
      65             :     StatEntry  *root;
      66             : } TSVectorStat;
      67             : 
      68             : #define STATHDRSIZE (offsetof(TSVectorStat, data))
      69             : 
      70             : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
      71             : static int  tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
      72             : 
      73             : /*
      74             :  * Order: haspos, len, word, for all positions (pos, weight)
      75             :  */
      76             : static int
      77           0 : silly_cmp_tsvector(const TSVector a, const TSVector b)
      78             : {
      79           0 :     if (VARSIZE(a) < VARSIZE(b))
      80           0 :         return -1;
      81           0 :     else if (VARSIZE(a) > VARSIZE(b))
      82           0 :         return 1;
      83           0 :     else if (a->size < b->size)
      84           0 :         return -1;
      85           0 :     else if (a->size > b->size)
      86           0 :         return 1;
      87             :     else
      88             :     {
      89           0 :         WordEntry  *aptr = ARRPTR(a);
      90           0 :         WordEntry  *bptr = ARRPTR(b);
      91           0 :         int         i = 0;
      92             :         int         res;
      93             : 
      94             : 
      95           0 :         for (i = 0; i < a->size; i++)
      96             :         {
      97           0 :             if (aptr->haspos != bptr->haspos)
      98             :             {
      99           0 :                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
     100             :             }
     101           0 :             else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
     102             :             {
     103           0 :                 return res;
     104             :             }
     105           0 :             else if (aptr->haspos)
     106             :             {
     107           0 :                 WordEntryPos *ap = POSDATAPTR(a, aptr);
     108           0 :                 WordEntryPos *bp = POSDATAPTR(b, bptr);
     109             :                 int         j;
     110             : 
     111           0 :                 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
     112           0 :                     return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
     113             : 
     114           0 :                 for (j = 0; j < POSDATALEN(a, aptr); j++)
     115             :                 {
     116           0 :                     if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
     117             :                     {
     118           0 :                         return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
     119             :                     }
     120           0 :                     else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
     121             :                     {
     122           0 :                         return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
     123             :                     }
     124           0 :                     ap++, bp++;
     125             :                 }
     126             :             }
     127             : 
     128           0 :             aptr++;
     129           0 :             bptr++;
     130             :         }
     131             :     }
     132             : 
     133           0 :     return 0;
     134             : }
     135             : 
     136             : #define TSVECTORCMPFUNC( type, action, ret )            \
     137             : Datum                                                   \
     138             : tsvector_##type(PG_FUNCTION_ARGS)                       \
     139             : {                                                       \
     140             :     TSVector    a = PG_GETARG_TSVECTOR(0);              \
     141             :     TSVector    b = PG_GETARG_TSVECTOR(1);              \
     142             :     int         res = silly_cmp_tsvector(a, b);         \
     143             :     PG_FREE_IF_COPY(a,0);                               \
     144             :     PG_FREE_IF_COPY(b,1);                               \
     145             :     PG_RETURN_##ret( res action 0 );                    \
     146             : }   \
     147             : /* keep compiler quiet - no extra ; */                  \
     148             : extern int no_such_variable
     149             : 
     150           0 : TSVECTORCMPFUNC(lt, <, BOOL);
     151           0 : TSVECTORCMPFUNC(le, <=, BOOL);
     152           0 : TSVECTORCMPFUNC(eq, ==, BOOL);
     153           0 : TSVECTORCMPFUNC(ge, >=, BOOL);
     154           0 : TSVECTORCMPFUNC(gt, >, BOOL);
     155           0 : TSVECTORCMPFUNC(ne, !=, BOOL);
     156           0 : TSVECTORCMPFUNC(cmp, +, INT32);
     157             : 
     158             : Datum
     159           6 : tsvector_strip(PG_FUNCTION_ARGS)
     160             : {
     161           6 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     162             :     TSVector    out;
     163             :     int         i,
     164           6 :                 len = 0;
     165           6 :     WordEntry  *arrin = ARRPTR(in),
     166             :                *arrout;
     167             :     char       *cur;
     168             : 
     169          25 :     for (i = 0; i < in->size; i++)
     170          19 :         len += arrin[i].len;
     171             : 
     172           6 :     len = CALCDATASIZE(in->size, len);
     173           6 :     out = (TSVector) palloc0(len);
     174           6 :     SET_VARSIZE(out, len);
     175           6 :     out->size = in->size;
     176           6 :     arrout = ARRPTR(out);
     177           6 :     cur = STRPTR(out);
     178          25 :     for (i = 0; i < in->size; i++)
     179             :     {
     180          19 :         memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
     181          19 :         arrout[i].haspos = 0;
     182          19 :         arrout[i].len = arrin[i].len;
     183          19 :         arrout[i].pos = cur - STRPTR(out);
     184          19 :         cur += arrout[i].len;
     185             :     }
     186             : 
     187           6 :     PG_FREE_IF_COPY(in, 0);
     188           6 :     PG_RETURN_POINTER(out);
     189             : }
     190             : 
     191             : Datum
     192           1 : tsvector_length(PG_FUNCTION_ARGS)
     193             : {
     194           1 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     195           1 :     int32       ret = in->size;
     196             : 
     197           1 :     PG_FREE_IF_COPY(in, 0);
     198           1 :     PG_RETURN_INT32(ret);
     199             : }
     200             : 
     201             : Datum
     202           2 : tsvector_setweight(PG_FUNCTION_ARGS)
     203             : {
     204           2 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     205           2 :     char        cw = PG_GETARG_CHAR(1);
     206             :     TSVector    out;
     207             :     int         i,
     208             :                 j;
     209             :     WordEntry  *entry;
     210             :     WordEntryPos *p;
     211           2 :     int         w = 0;
     212             : 
     213           2 :     switch (cw)
     214             :     {
     215             :         case 'A':
     216             :         case 'a':
     217           0 :             w = 3;
     218           0 :             break;
     219             :         case 'B':
     220             :         case 'b':
     221           0 :             w = 2;
     222           0 :             break;
     223             :         case 'C':
     224             :         case 'c':
     225           2 :             w = 1;
     226           2 :             break;
     227             :         case 'D':
     228             :         case 'd':
     229           0 :             w = 0;
     230           0 :             break;
     231             :         default:
     232             :             /* internal error */
     233           0 :             elog(ERROR, "unrecognized weight: %d", cw);
     234             :     }
     235             : 
     236           2 :     out = (TSVector) palloc(VARSIZE(in));
     237           2 :     memcpy(out, in, VARSIZE(in));
     238           2 :     entry = ARRPTR(out);
     239           2 :     i = out->size;
     240          12 :     while (i--)
     241             :     {
     242           8 :         if ((j = POSDATALEN(out, entry)) != 0)
     243             :         {
     244           8 :             p = POSDATAPTR(out, entry);
     245          36 :             while (j--)
     246             :             {
     247          20 :                 WEP_SETWEIGHT(*p, w);
     248          20 :                 p++;
     249             :             }
     250             :         }
     251           8 :         entry++;
     252             :     }
     253             : 
     254           2 :     PG_FREE_IF_COPY(in, 0);
     255           2 :     PG_RETURN_POINTER(out);
     256             : }
     257             : 
     258             : /*
     259             :  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
     260             :  *
     261             :  * Assign weight w to elements of tsin that are listed in lexemes.
     262             :  */
     263             : Datum
     264           5 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
     265             : {
     266           5 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     267           5 :     char        char_weight = PG_GETARG_CHAR(1);
     268           5 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
     269             : 
     270             :     TSVector    tsout;
     271             :     int         i,
     272             :                 j,
     273             :                 nlexemes,
     274             :                 weight;
     275             :     WordEntry  *entry;
     276             :     Datum      *dlexemes;
     277             :     bool       *nulls;
     278             : 
     279           5 :     switch (char_weight)
     280             :     {
     281             :         case 'A':
     282             :         case 'a':
     283           0 :             weight = 3;
     284           0 :             break;
     285             :         case 'B':
     286             :         case 'b':
     287           0 :             weight = 2;
     288           0 :             break;
     289             :         case 'C':
     290             :         case 'c':
     291           5 :             weight = 1;
     292           5 :             break;
     293             :         case 'D':
     294             :         case 'd':
     295           0 :             weight = 0;
     296           0 :             break;
     297             :         default:
     298             :             /* internal error */
     299           0 :             elog(ERROR, "unrecognized weight: %c", char_weight);
     300             :     }
     301             : 
     302           5 :     tsout = (TSVector) palloc(VARSIZE(tsin));
     303           5 :     memcpy(tsout, tsin, VARSIZE(tsin));
     304           5 :     entry = ARRPTR(tsout);
     305             : 
     306           5 :     deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
     307             :                       &dlexemes, &nulls, &nlexemes);
     308             : 
     309             :     /*
     310             :      * Assuming that lexemes array is significantly shorter than tsvector we
     311             :      * can iterate through lexemes performing binary search of each lexeme
     312             :      * from lexemes in tsvector.
     313             :      */
     314          13 :     for (i = 0; i < nlexemes; i++)
     315             :     {
     316             :         char       *lex;
     317             :         int         lex_len,
     318             :                     lex_pos;
     319             : 
     320           9 :         if (nulls[i])
     321           1 :             ereport(ERROR,
     322             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     323             :                      errmsg("lexeme array may not contain nulls")));
     324             : 
     325           8 :         lex = VARDATA(dlexemes[i]);
     326           8 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     327           8 :         lex_pos = tsvector_bsearch(tsout, lex, lex_len);
     328             : 
     329           8 :         if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
     330             :         {
     331           4 :             WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
     332             : 
     333          17 :             while (j--)
     334             :             {
     335           9 :                 WEP_SETWEIGHT(*p, weight);
     336           9 :                 p++;
     337             :             }
     338             :         }
     339             :     }
     340             : 
     341           4 :     PG_FREE_IF_COPY(tsin, 0);
     342           4 :     PG_FREE_IF_COPY(lexemes, 2);
     343             : 
     344           4 :     PG_RETURN_POINTER(tsout);
     345             : }
     346             : 
     347             : #define compareEntry(pa, a, pb, b) \
     348             :     tsCompareString((pa) + (a)->pos, (a)->len,    \
     349             :                     (pb) + (b)->pos, (b)->len,    \
     350             :                     false)
     351             : 
     352             : /*
     353             :  * Add positions from src to dest after offsetting them by maxpos.
     354             :  * Return the number added (might be less than expected due to overflow)
     355             :  */
     356             : static int32
     357           2 : add_pos(TSVector src, WordEntry *srcptr,
     358             :         TSVector dest, WordEntry *destptr,
     359             :         int32 maxpos)
     360             : {
     361           2 :     uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
     362             :     int         i;
     363           2 :     uint16      slen = POSDATALEN(src, srcptr),
     364             :                 startlen;
     365           2 :     WordEntryPos *spos = POSDATAPTR(src, srcptr),
     366           2 :                *dpos = POSDATAPTR(dest, destptr);
     367             : 
     368           2 :     if (!destptr->haspos)
     369           0 :         *clen = 0;
     370             : 
     371           2 :     startlen = *clen;
     372           6 :     for (i = 0;
     373           8 :          i < slen && *clen < MAXNUMPOS &&
     374           3 :          (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
     375           2 :          i++)
     376             :     {
     377           2 :         WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
     378           2 :         WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
     379           2 :         (*clen)++;
     380             :     }
     381             : 
     382           2 :     if (*clen != startlen)
     383           2 :         destptr->haspos = 1;
     384           2 :     return *clen - startlen;
     385             : }
     386             : 
     387             : /*
     388             :  * Perform binary search of given lexeme in TSVector.
     389             :  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
     390             :  * found.
     391             :  */
     392             : static int
     393          33 : tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
     394             : {
     395          33 :     WordEntry  *arrin = ARRPTR(tsv);
     396          33 :     int         StopLow = 0,
     397          33 :                 StopHigh = tsv->size,
     398             :                 StopMiddle,
     399             :                 cmp;
     400             : 
     401         117 :     while (StopLow < StopHigh)
     402             :     {
     403          76 :         StopMiddle = (StopLow + StopHigh) / 2;
     404             : 
     405         152 :         cmp = tsCompareString(lexeme, lexeme_len,
     406          76 :                               STRPTR(tsv) + arrin[StopMiddle].pos,
     407          76 :                               arrin[StopMiddle].len,
     408             :                               false);
     409             : 
     410          76 :         if (cmp < 0)
     411          32 :             StopHigh = StopMiddle;
     412          44 :         else if (cmp > 0)
     413          19 :             StopLow = StopMiddle + 1;
     414             :         else                    /* found it */
     415          25 :             return StopMiddle;
     416             :     }
     417             : 
     418           8 :     return -1;
     419             : }
     420             : 
     421             : /*
     422             :  * qsort comparator functions
     423             :  */
     424             : 
     425             : static int
     426           6 : compare_int(const void *va, const void *vb)
     427             : {
     428           6 :     int         a = *((const int *) va);
     429           6 :     int         b = *((const int *) vb);
     430             : 
     431           6 :     if (a == b)
     432           1 :         return 0;
     433           5 :     return (a > b) ? 1 : -1;
     434             : }
     435             : 
     436             : static int
     437          17 : compare_text_lexemes(const void *va, const void *vb)
     438             : {
     439          17 :     Datum       a = *((const Datum *) va);
     440          17 :     Datum       b = *((const Datum *) vb);
     441          17 :     char       *alex = VARDATA_ANY(a);
     442          17 :     int         alex_len = VARSIZE_ANY_EXHDR(a);
     443          17 :     char       *blex = VARDATA_ANY(b);
     444          17 :     int         blex_len = VARSIZE_ANY_EXHDR(b);
     445             : 
     446          17 :     return tsCompareString(alex, alex_len, blex, blex_len, false);
     447             : }
     448             : 
     449             : /*
     450             :  * Internal routine to delete lexemes from TSVector by array of offsets.
     451             :  *
     452             :  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
     453             :  * int indices_count -- size of that array
     454             :  *
     455             :  * Returns new TSVector without given lexemes along with their positions
     456             :  * and weights.
     457             :  */
     458             : static TSVector
     459          10 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
     460             :                            int indices_count)
     461             : {
     462             :     TSVector    tsout;
     463          10 :     WordEntry  *arrin = ARRPTR(tsv),
     464             :                *arrout;
     465          10 :     char       *data = STRPTR(tsv),
     466             :                *dataout;
     467             :     int         i,              /* index in arrin */
     468             :                 j,              /* index in arrout */
     469             :                 k,              /* index in indices_to_delete */
     470             :                 curoff;         /* index in dataout area */
     471             : 
     472             :     /*
     473             :      * Sort the filter array to simplify membership checks below.  Also, get
     474             :      * rid of any duplicate entries, so that we can assume that indices_count
     475             :      * is exactly equal to the number of lexemes that will be removed.
     476             :      */
     477          10 :     if (indices_count > 1)
     478             :     {
     479             :         int         kp;
     480             : 
     481           4 :         qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
     482           4 :         kp = 0;
     483           9 :         for (k = 1; k < indices_count; k++)
     484             :         {
     485           5 :             if (indices_to_delete[k] != indices_to_delete[kp])
     486           4 :                 indices_to_delete[++kp] = indices_to_delete[k];
     487             :         }
     488           4 :         indices_count = ++kp;
     489             :     }
     490             : 
     491             :     /*
     492             :      * Here we overestimate tsout size, since we don't know how much space is
     493             :      * used by the deleted lexeme(s).  We will set exact size below.
     494             :      */
     495          10 :     tsout = (TSVector) palloc0(VARSIZE(tsv));
     496             : 
     497             :     /* This count must be correct because STRPTR(tsout) relies on it. */
     498          10 :     tsout->size = tsv->size - indices_count;
     499             : 
     500             :     /*
     501             :      * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
     502             :      */
     503          10 :     arrout = ARRPTR(tsout);
     504          10 :     dataout = STRPTR(tsout);
     505          10 :     curoff = 0;
     506          60 :     for (i = j = k = 0; i < tsv->size; i++)
     507             :     {
     508             :         /*
     509             :          * If current i is present in indices_to_delete, skip this lexeme.
     510             :          * Since indices_to_delete is already sorted, we only need to check
     511             :          * the current (k'th) entry.
     512             :          */
     513          50 :         if (k < indices_count && i == indices_to_delete[k])
     514             :         {
     515          14 :             k++;
     516          14 :             continue;
     517             :         }
     518             : 
     519             :         /* Copy lexeme and its positions and weights */
     520          36 :         memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
     521          36 :         arrout[j].haspos = arrin[i].haspos;
     522          36 :         arrout[j].len = arrin[i].len;
     523          36 :         arrout[j].pos = curoff;
     524          36 :         curoff += arrin[i].len;
     525          36 :         if (arrin[i].haspos)
     526             :         {
     527          26 :             int         len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
     528             :             + sizeof(uint16);
     529             : 
     530          26 :             curoff = SHORTALIGN(curoff);
     531          78 :             memcpy(dataout + curoff,
     532          52 :                    STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
     533             :                    len);
     534          26 :             curoff += len;
     535             :         }
     536             : 
     537          36 :         j++;
     538             :     }
     539             : 
     540             :     /*
     541             :      * k should now be exactly equal to indices_count. If it isn't then the
     542             :      * caller provided us with indices outside of [0, tsv->size) range and
     543             :      * estimation of tsout's size is wrong.
     544             :      */
     545          10 :     Assert(k == indices_count);
     546             : 
     547          10 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
     548          10 :     return tsout;
     549             : }
     550             : 
     551             : /*
     552             :  * Delete given lexeme from tsvector.
     553             :  * Implementation of user-level ts_delete(tsvector, text).
     554             :  */
     555             : Datum
     556           6 : tsvector_delete_str(PG_FUNCTION_ARGS)
     557             : {
     558           6 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     559             :                 tsout;
     560           6 :     text       *tlexeme = PG_GETARG_TEXT_PP(1);
     561           6 :     char       *lexeme = VARDATA_ANY(tlexeme);
     562           6 :     int         lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
     563             :                 skip_index;
     564             : 
     565           6 :     if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
     566           2 :         PG_RETURN_POINTER(tsin);
     567             : 
     568           4 :     tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
     569             : 
     570           4 :     PG_FREE_IF_COPY(tsin, 0);
     571           4 :     PG_FREE_IF_COPY(tlexeme, 1);
     572           4 :     PG_RETURN_POINTER(tsout);
     573             : }
     574             : 
     575             : /*
     576             :  * Delete given array of lexemes from tsvector.
     577             :  * Implementation of user-level ts_delete(tsvector, text[]).
     578             :  */
     579             : Datum
     580           7 : tsvector_delete_arr(PG_FUNCTION_ARGS)
     581             : {
     582           7 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     583             :                 tsout;
     584           7 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
     585             :     int         i,
     586             :                 nlex,
     587             :                 skip_count,
     588             :                *skip_indices;
     589             :     Datum      *dlexemes;
     590             :     bool       *nulls;
     591             : 
     592           7 :     deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
     593             :                       &dlexemes, &nulls, &nlex);
     594             : 
     595             :     /*
     596             :      * In typical use case array of lexemes to delete is relatively small. So
     597             :      * here we optimize things for that scenario: iterate through lexarr
     598             :      * performing binary search of each lexeme from lexarr in tsvector.
     599             :      */
     600           7 :     skip_indices = palloc0(nlex * sizeof(int));
     601          26 :     for (i = skip_count = 0; i < nlex; i++)
     602             :     {
     603             :         char       *lex;
     604             :         int         lex_len,
     605             :                     lex_pos;
     606             : 
     607          20 :         if (nulls[i])
     608           1 :             ereport(ERROR,
     609             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     610             :                      errmsg("lexeme array may not contain nulls")));
     611             : 
     612          19 :         lex = VARDATA(dlexemes[i]);
     613          19 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     614          19 :         lex_pos = tsvector_bsearch(tsin, lex, lex_len);
     615             : 
     616          19 :         if (lex_pos >= 0)
     617          13 :             skip_indices[skip_count++] = lex_pos;
     618             :     }
     619             : 
     620           6 :     tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
     621             : 
     622           6 :     pfree(skip_indices);
     623           6 :     PG_FREE_IF_COPY(tsin, 0);
     624           6 :     PG_FREE_IF_COPY(lexemes, 1);
     625             : 
     626           6 :     PG_RETURN_POINTER(tsout);
     627             : }
     628             : 
     629             : /*
     630             :  * Expand tsvector as table with following columns:
     631             :  *     lexeme: lexeme text
     632             :  *     positions: integer array of lexeme positions
     633             :  *     weights: char array of weights corresponding to positions
     634             :  */
     635             : Datum
     636          30 : tsvector_unnest(PG_FUNCTION_ARGS)
     637             : {
     638             :     FuncCallContext *funcctx;
     639             :     TSVector    tsin;
     640             : 
     641          30 :     if (SRF_IS_FIRSTCALL())
     642             :     {
     643             :         MemoryContext oldcontext;
     644             :         TupleDesc   tupdesc;
     645             : 
     646           5 :         funcctx = SRF_FIRSTCALL_INIT();
     647           5 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     648             : 
     649           5 :         tupdesc = CreateTemplateTupleDesc(3, false);
     650           5 :         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
     651             :                            TEXTOID, -1, 0);
     652           5 :         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
     653             :                            INT2ARRAYOID, -1, 0);
     654           5 :         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
     655             :                            TEXTARRAYOID, -1, 0);
     656           5 :         funcctx->tuple_desc = BlessTupleDesc(tupdesc);
     657             : 
     658           5 :         funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
     659             : 
     660           5 :         MemoryContextSwitchTo(oldcontext);
     661             :     }
     662             : 
     663          30 :     funcctx = SRF_PERCALL_SETUP();
     664          30 :     tsin = (TSVector) funcctx->user_fctx;
     665             : 
     666          30 :     if (funcctx->call_cntr < tsin->size)
     667             :     {
     668          25 :         WordEntry  *arrin = ARRPTR(tsin);
     669          25 :         char       *data = STRPTR(tsin);
     670             :         HeapTuple   tuple;
     671             :         int         j,
     672          25 :                     i = funcctx->call_cntr;
     673          25 :         bool        nulls[] = {false, false, false};
     674             :         Datum       values[3];
     675             : 
     676          25 :         values[0] = PointerGetDatum(
     677             :                                     cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
     678             :             );
     679             : 
     680          25 :         if (arrin[i].haspos)
     681             :         {
     682             :             WordEntryPosVector *posv;
     683             :             Datum      *positions;
     684             :             Datum      *weights;
     685             :             char        weight;
     686             : 
     687             :             /*
     688             :              * Internally tsvector stores position and weight in the same
     689             :              * uint16 (2 bits for weight, 14 for position). Here we extract
     690             :              * that in two separate arrays.
     691             :              */
     692          15 :             posv = _POSVECPTR(tsin, arrin + i);
     693          15 :             positions = palloc(posv->npos * sizeof(Datum));
     694          15 :             weights = palloc(posv->npos * sizeof(Datum));
     695          42 :             for (j = 0; j < posv->npos; j++)
     696             :             {
     697          27 :                 positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
     698          27 :                 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
     699          27 :                 weights[j] = PointerGetDatum(
     700             :                                              cstring_to_text_with_len(&weight, 1)
     701             :                     );
     702             :             }
     703             : 
     704          15 :             values[1] = PointerGetDatum(
     705             :                                         construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
     706          15 :             values[2] = PointerGetDatum(
     707             :                                         construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
     708             :         }
     709             :         else
     710             :         {
     711          10 :             nulls[1] = nulls[2] = true;
     712             :         }
     713             : 
     714          25 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     715          25 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     716             :     }
     717             :     else
     718             :     {
     719           5 :         pfree(tsin);
     720           5 :         SRF_RETURN_DONE(funcctx);
     721             :     }
     722             : }
     723             : 
     724             : /*
     725             :  * Convert tsvector to array of lexemes.
     726             :  */
     727             : Datum
     728           2 : tsvector_to_array(PG_FUNCTION_ARGS)
     729             : {
     730           2 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     731           2 :     WordEntry  *arrin = ARRPTR(tsin);
     732             :     Datum      *elements;
     733             :     int         i;
     734             :     ArrayType  *array;
     735             : 
     736           2 :     elements = palloc(tsin->size * sizeof(Datum));
     737             : 
     738          12 :     for (i = 0; i < tsin->size; i++)
     739             :     {
     740          10 :         elements[i] = PointerGetDatum(
     741             :                                       cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
     742             :             );
     743             :     }
     744             : 
     745           2 :     array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
     746             : 
     747           2 :     pfree(elements);
     748           2 :     PG_FREE_IF_COPY(tsin, 0);
     749           2 :     PG_RETURN_POINTER(array);
     750             : }
     751             : 
     752             : /*
     753             :  * Build tsvector from array of lexemes.
     754             :  */
     755             : Datum
     756           3 : array_to_tsvector(PG_FUNCTION_ARGS)
     757             : {
     758           3 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
     759             :     TSVector    tsout;
     760             :     Datum      *dlexemes;
     761             :     WordEntry  *arrout;
     762             :     bool       *nulls;
     763             :     int         nitems,
     764             :                 i,
     765             :                 j,
     766             :                 tslen,
     767           3 :                 datalen = 0;
     768             :     char       *cur;
     769             : 
     770           3 :     deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
     771             : 
     772             :     /* Reject nulls (maybe we should just ignore them, instead?) */
     773          16 :     for (i = 0; i < nitems; i++)
     774             :     {
     775          14 :         if (nulls[i])
     776           1 :             ereport(ERROR,
     777             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     778             :                      errmsg("lexeme array may not contain nulls")));
     779             :     }
     780             : 
     781             :     /* Sort and de-dup, because this is required for a valid tsvector. */
     782           2 :     if (nitems > 1)
     783             :     {
     784           2 :         qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
     785           2 :         j = 0;
     786           9 :         for (i = 1; i < nitems; i++)
     787             :         {
     788           7 :             if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
     789           6 :                 dlexemes[++j] = dlexemes[i];
     790             :         }
     791           2 :         nitems = ++j;
     792             :     }
     793             : 
     794             :     /* Calculate space needed for surviving lexemes. */
     795          10 :     for (i = 0; i < nitems; i++)
     796           8 :         datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
     797           2 :     tslen = CALCDATASIZE(nitems, datalen);
     798             : 
     799             :     /* Allocate and fill tsvector. */
     800           2 :     tsout = (TSVector) palloc0(tslen);
     801           2 :     SET_VARSIZE(tsout, tslen);
     802           2 :     tsout->size = nitems;
     803             : 
     804           2 :     arrout = ARRPTR(tsout);
     805           2 :     cur = STRPTR(tsout);
     806          10 :     for (i = 0; i < nitems; i++)
     807             :     {
     808           8 :         char       *lex = VARDATA(dlexemes[i]);
     809           8 :         int         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     810             : 
     811           8 :         memcpy(cur, lex, lex_len);
     812           8 :         arrout[i].haspos = 0;
     813           8 :         arrout[i].len = lex_len;
     814           8 :         arrout[i].pos = cur - STRPTR(tsout);
     815           8 :         cur += lex_len;
     816             :     }
     817             : 
     818           2 :     PG_FREE_IF_COPY(v, 0);
     819           2 :     PG_RETURN_POINTER(tsout);
     820             : }
     821             : 
     822             : /*
     823             :  * ts_filter(): keep only lexemes with given weights in tsvector.
     824             :  */
     825             : Datum
     826           3 : tsvector_filter(PG_FUNCTION_ARGS)
     827             : {
     828           3 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     829             :                 tsout;
     830           3 :     ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
     831           3 :     WordEntry  *arrin = ARRPTR(tsin),
     832             :                *arrout;
     833           3 :     char       *datain = STRPTR(tsin),
     834             :                *dataout;
     835             :     Datum      *dweights;
     836             :     bool       *nulls;
     837             :     int         nweights;
     838             :     int         i,
     839             :                 j;
     840           3 :     int         cur_pos = 0;
     841           3 :     char        mask = 0;
     842             : 
     843           3 :     deconstruct_array(weights, CHAROID, 1, true, 'c',
     844             :                       &dweights, &nulls, &nweights);
     845             : 
     846           7 :     for (i = 0; i < nweights; i++)
     847             :     {
     848             :         char        char_weight;
     849             : 
     850           5 :         if (nulls[i])
     851           1 :             ereport(ERROR,
     852             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     853             :                      errmsg("weight array may not contain nulls")));
     854             : 
     855           4 :         char_weight = DatumGetChar(dweights[i]);
     856           4 :         switch (char_weight)
     857             :         {
     858             :             case 'A':
     859             :             case 'a':
     860           3 :                 mask = mask | 8;
     861           3 :                 break;
     862             :             case 'B':
     863             :             case 'b':
     864           1 :                 mask = mask | 4;
     865           1 :                 break;
     866             :             case 'C':
     867             :             case 'c':
     868           0 :                 mask = mask | 2;
     869           0 :                 break;
     870             :             case 'D':
     871             :             case 'd':
     872           0 :                 mask = mask | 1;
     873           0 :                 break;
     874             :             default:
     875           0 :                 ereport(ERROR,
     876             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     877             :                          errmsg("unrecognized weight: \"%c\"", char_weight)));
     878             :         }
     879             :     }
     880             : 
     881           2 :     tsout = (TSVector) palloc0(VARSIZE(tsin));
     882           2 :     tsout->size = tsin->size;
     883           2 :     arrout = ARRPTR(tsout);
     884           2 :     dataout = STRPTR(tsout);
     885             : 
     886          18 :     for (i = j = 0; i < tsin->size; i++)
     887             :     {
     888             :         WordEntryPosVector *posvin,
     889             :                    *posvout;
     890          16 :         int         npos = 0;
     891             :         int         k;
     892             : 
     893          16 :         if (!arrin[i].haspos)
     894           5 :             continue;
     895             : 
     896          11 :         posvin = _POSVECPTR(tsin, arrin + i);
     897          11 :         posvout = (WordEntryPosVector *)
     898          11 :             (dataout + SHORTALIGN(cur_pos + arrin[i].len));
     899             : 
     900          22 :         for (k = 0; k < posvin->npos; k++)
     901             :         {
     902          11 :             if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
     903           5 :                 posvout->pos[npos++] = posvin->pos[k];
     904             :         }
     905             : 
     906             :         /* if no satisfactory positions found, skip lexeme */
     907          11 :         if (!npos)
     908           6 :             continue;
     909             : 
     910           5 :         arrout[j].haspos = true;
     911           5 :         arrout[j].len = arrin[i].len;
     912           5 :         arrout[j].pos = cur_pos;
     913             : 
     914           5 :         memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
     915           5 :         posvout->npos = npos;
     916           5 :         cur_pos += SHORTALIGN(arrin[i].len);
     917           5 :         cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
     918             :             sizeof(uint16);
     919           5 :         j++;
     920             :     }
     921             : 
     922           2 :     tsout->size = j;
     923           2 :     if (dataout != STRPTR(tsout))
     924           2 :         memmove(STRPTR(tsout), dataout, cur_pos);
     925             : 
     926           2 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
     927             : 
     928           2 :     PG_FREE_IF_COPY(tsin, 0);
     929           2 :     PG_RETURN_POINTER(tsout);
     930             : }
     931             : 
     932             : Datum
     933           2 : tsvector_concat(PG_FUNCTION_ARGS)
     934             : {
     935           2 :     TSVector    in1 = PG_GETARG_TSVECTOR(0);
     936           2 :     TSVector    in2 = PG_GETARG_TSVECTOR(1);
     937             :     TSVector    out;
     938             :     WordEntry  *ptr;
     939             :     WordEntry  *ptr1,
     940             :                *ptr2;
     941             :     WordEntryPos *p;
     942           2 :     int         maxpos = 0,
     943             :                 i,
     944             :                 j,
     945             :                 i1,
     946             :                 i2,
     947             :                 dataoff,
     948             :                 output_bytes,
     949             :                 output_size;
     950             :     char       *data,
     951             :                *data1,
     952             :                *data2;
     953             : 
     954             :     /* Get max position in in1; we'll need this to offset in2's positions */
     955           2 :     ptr = ARRPTR(in1);
     956           2 :     i = in1->size;
     957           7 :     while (i--)
     958             :     {
     959           3 :         if ((j = POSDATALEN(in1, ptr)) != 0)
     960             :         {
     961           3 :             p = POSDATAPTR(in1, ptr);
     962           9 :             while (j--)
     963             :             {
     964           3 :                 if (WEP_GETPOS(*p) > maxpos)
     965           2 :                     maxpos = WEP_GETPOS(*p);
     966           3 :                 p++;
     967             :             }
     968             :         }
     969           3 :         ptr++;
     970             :     }
     971             : 
     972           2 :     ptr1 = ARRPTR(in1);
     973           2 :     ptr2 = ARRPTR(in2);
     974           2 :     data1 = STRPTR(in1);
     975           2 :     data2 = STRPTR(in2);
     976           2 :     i1 = in1->size;
     977           2 :     i2 = in2->size;
     978             : 
     979             :     /*
     980             :      * Conservative estimate of space needed.  We might need all the data in
     981             :      * both inputs, and conceivably add a pad byte before position data for
     982             :      * each item where there was none before.
     983             :      */
     984           2 :     output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
     985             : 
     986           2 :     out = (TSVector) palloc0(output_bytes);
     987           2 :     SET_VARSIZE(out, output_bytes);
     988             : 
     989             :     /*
     990             :      * We must make out->size valid so that STRPTR(out) is sensible.  We'll
     991             :      * collapse out any unused space at the end.
     992             :      */
     993           2 :     out->size = in1->size + in2->size;
     994             : 
     995           2 :     ptr = ARRPTR(out);
     996           2 :     data = STRPTR(out);
     997           2 :     dataoff = 0;
     998           7 :     while (i1 && i2)
     999             :     {
    1000           3 :         int         cmp = compareEntry(data1, ptr1, data2, ptr2);
    1001             : 
    1002           3 :         if (cmp < 0)
    1003             :         {                       /* in1 first */
    1004           1 :             ptr->haspos = ptr1->haspos;
    1005           1 :             ptr->len = ptr1->len;
    1006           1 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1007           1 :             ptr->pos = dataoff;
    1008           1 :             dataoff += ptr1->len;
    1009           1 :             if (ptr->haspos)
    1010             :             {
    1011           1 :                 dataoff = SHORTALIGN(dataoff);
    1012           1 :                 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1013           1 :                 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1014             :             }
    1015             : 
    1016           1 :             ptr++;
    1017           1 :             ptr1++;
    1018           1 :             i1--;
    1019             :         }
    1020           2 :         else if (cmp > 0)
    1021             :         {                       /* in2 first */
    1022           1 :             ptr->haspos = ptr2->haspos;
    1023           1 :             ptr->len = ptr2->len;
    1024           1 :             memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1025           1 :             ptr->pos = dataoff;
    1026           1 :             dataoff += ptr2->len;
    1027           1 :             if (ptr->haspos)
    1028             :             {
    1029           0 :                 int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1030             : 
    1031           0 :                 if (addlen == 0)
    1032           0 :                     ptr->haspos = 0;
    1033             :                 else
    1034             :                 {
    1035           0 :                     dataoff = SHORTALIGN(dataoff);
    1036           0 :                     dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1037             :                 }
    1038             :             }
    1039             : 
    1040           1 :             ptr++;
    1041           1 :             ptr2++;
    1042           1 :             i2--;
    1043             :         }
    1044             :         else
    1045             :         {
    1046           1 :             ptr->haspos = ptr1->haspos | ptr2->haspos;
    1047           1 :             ptr->len = ptr1->len;
    1048           1 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1049           1 :             ptr->pos = dataoff;
    1050           1 :             dataoff += ptr1->len;
    1051           1 :             if (ptr->haspos)
    1052             :             {
    1053           1 :                 if (ptr1->haspos)
    1054             :                 {
    1055           1 :                     dataoff = SHORTALIGN(dataoff);
    1056           1 :                     memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1057           1 :                     dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1058           1 :                     if (ptr2->haspos)
    1059           1 :                         dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
    1060             :                 }
    1061             :                 else            /* must have ptr2->haspos */
    1062             :                 {
    1063           0 :                     int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1064             : 
    1065           0 :                     if (addlen == 0)
    1066           0 :                         ptr->haspos = 0;
    1067             :                     else
    1068             :                     {
    1069           0 :                         dataoff = SHORTALIGN(dataoff);
    1070           0 :                         dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1071             :                     }
    1072             :                 }
    1073             :             }
    1074             : 
    1075           1 :             ptr++;
    1076           1 :             ptr1++;
    1077           1 :             ptr2++;
    1078           1 :             i1--;
    1079           1 :             i2--;
    1080             :         }
    1081             :     }
    1082             : 
    1083           5 :     while (i1)
    1084             :     {
    1085           1 :         ptr->haspos = ptr1->haspos;
    1086           1 :         ptr->len = ptr1->len;
    1087           1 :         memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1088           1 :         ptr->pos = dataoff;
    1089           1 :         dataoff += ptr1->len;
    1090           1 :         if (ptr->haspos)
    1091             :         {
    1092           1 :             dataoff = SHORTALIGN(dataoff);
    1093           1 :             memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1094           1 :             dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1095             :         }
    1096             : 
    1097           1 :         ptr++;
    1098           1 :         ptr1++;
    1099           1 :         i1--;
    1100             :     }
    1101             : 
    1102           5 :     while (i2)
    1103             :     {
    1104           1 :         ptr->haspos = ptr2->haspos;
    1105           1 :         ptr->len = ptr2->len;
    1106           1 :         memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1107           1 :         ptr->pos = dataoff;
    1108           1 :         dataoff += ptr2->len;
    1109           1 :         if (ptr->haspos)
    1110             :         {
    1111           1 :             int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1112             : 
    1113           1 :             if (addlen == 0)
    1114           0 :                 ptr->haspos = 0;
    1115             :             else
    1116             :             {
    1117           1 :                 dataoff = SHORTALIGN(dataoff);
    1118           1 :                 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1119             :             }
    1120             :         }
    1121             : 
    1122           1 :         ptr++;
    1123           1 :         ptr2++;
    1124           1 :         i2--;
    1125             :     }
    1126             : 
    1127             :     /*
    1128             :      * Instead of checking each offset individually, we check for overflow of
    1129             :      * pos fields once at the end.
    1130             :      */
    1131           2 :     if (dataoff > MAXSTRPOS)
    1132           0 :         ereport(ERROR,
    1133             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1134             :                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
    1135             : 
    1136             :     /*
    1137             :      * Adjust sizes (asserting that we didn't overrun the original estimates)
    1138             :      * and collapse out any unused array entries.
    1139             :      */
    1140           2 :     output_size = ptr - ARRPTR(out);
    1141           2 :     Assert(output_size <= out->size);
    1142           2 :     out->size = output_size;
    1143           2 :     if (data != STRPTR(out))
    1144           1 :         memmove(STRPTR(out), data, dataoff);
    1145           2 :     output_bytes = CALCDATASIZE(out->size, dataoff);
    1146           2 :     Assert(output_bytes <= VARSIZE(out));
    1147           2 :     SET_VARSIZE(out, output_bytes);
    1148             : 
    1149           2 :     PG_FREE_IF_COPY(in1, 0);
    1150           2 :     PG_FREE_IF_COPY(in2, 1);
    1151           2 :     PG_RETURN_POINTER(out);
    1152             : }
    1153             : 
    1154             : /*
    1155             :  * Compare two strings by tsvector rules.
    1156             :  *
    1157             :  * if isPrefix = true then it returns zero value iff b has prefix a
    1158             :  */
    1159             : int32
    1160      842914 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
    1161             : {
    1162             :     int         cmp;
    1163             : 
    1164      842914 :     if (lena == 0)
    1165             :     {
    1166           0 :         if (prefix)
    1167           0 :             cmp = 0;            /* empty string is prefix of anything */
    1168             :         else
    1169           0 :             cmp = (lenb > 0) ? -1 : 0;
    1170             :     }
    1171      842914 :     else if (lenb == 0)
    1172             :     {
    1173           0 :         cmp = (lena > 0) ? 1 : 0;
    1174             :     }
    1175             :     else
    1176             :     {
    1177      842914 :         cmp = memcmp(a, b, Min(lena, lenb));
    1178             : 
    1179      842914 :         if (prefix)
    1180             :         {
    1181        1680 :             if (cmp == 0 && lena > lenb)
    1182           0 :                 cmp = 1;        /* a is longer, so not a prefix of b */
    1183             :         }
    1184      841234 :         else if (cmp == 0 && lena != lenb)
    1185             :         {
    1186        3291 :             cmp = (lena < lenb) ? -1 : 1;
    1187             :         }
    1188             :     }
    1189             : 
    1190      842914 :     return cmp;
    1191             : }
    1192             : 
    1193             : /*
    1194             :  * Check weight info or/and fill 'data' with the required positions
    1195             :  */
    1196             : static bool
    1197        3667 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
    1198             :                ExecPhraseData *data)
    1199             : {
    1200        3667 :     bool        result = false;
    1201             : 
    1202        3667 :     if (entry->haspos && (val->weight || data))
    1203         132 :     {
    1204             :         WordEntryPosVector *posvec;
    1205             : 
    1206             :         /*
    1207             :          * We can't use the _POSVECPTR macro here because the pointer to the
    1208             :          * tsvector's lexeme storage is already contained in chkval->values.
    1209             :          */
    1210         132 :         posvec = (WordEntryPosVector *)
    1211         132 :             (chkval->values + SHORTALIGN(entry->pos + entry->len));
    1212             : 
    1213         132 :         if (val->weight && data)
    1214           0 :         {
    1215           0 :             WordEntryPos *posvec_iter = posvec->pos;
    1216             :             WordEntryPos *dptr;
    1217             : 
    1218             :             /*
    1219             :              * Filter position information by weights
    1220             :              */
    1221           0 :             dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
    1222           0 :             data->allocated = true;
    1223             : 
    1224             :             /* Is there a position with a matching weight? */
    1225           0 :             while (posvec_iter < posvec->pos + posvec->npos)
    1226             :             {
    1227             :                 /* If true, append this position to the data->pos */
    1228           0 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1229             :                 {
    1230           0 :                     *dptr = WEP_GETPOS(*posvec_iter);
    1231           0 :                     dptr++;
    1232             :                 }
    1233             : 
    1234           0 :                 posvec_iter++;
    1235             :             }
    1236             : 
    1237           0 :             data->npos = dptr - data->pos;
    1238             : 
    1239           0 :             if (data->npos > 0)
    1240           0 :                 result = true;
    1241             :         }
    1242         132 :         else if (val->weight)
    1243             :         {
    1244          21 :             WordEntryPos *posvec_iter = posvec->pos;
    1245             : 
    1246             :             /* Is there a position with a matching weight? */
    1247          54 :             while (posvec_iter < posvec->pos + posvec->npos)
    1248             :             {
    1249          29 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1250             :                 {
    1251          17 :                     result = true;
    1252          17 :                     break;      /* no need to go further */
    1253             :                 }
    1254             : 
    1255          12 :                 posvec_iter++;
    1256             :             }
    1257             :         }
    1258             :         else                    /* data != NULL */
    1259             :         {
    1260         111 :             data->npos = posvec->npos;
    1261         111 :             data->pos = posvec->pos;
    1262         111 :             data->allocated = false;
    1263         111 :             result = true;
    1264             :         }
    1265             :     }
    1266             :     else
    1267             :     {
    1268        3535 :         result = true;
    1269             :     }
    1270             : 
    1271        3667 :     return result;
    1272             : }
    1273             : 
    1274             : /*
    1275             :  * Removes duplicate pos entries. We can't use uniquePos() from
    1276             :  * tsvector.c because array might be longer than MAXENTRYPOS
    1277             :  *
    1278             :  * Returns new length.
    1279             :  */
    1280             : static int
    1281           2 : uniqueLongPos(WordEntryPos *pos, int npos)
    1282             : {
    1283             :     WordEntryPos *pos_iter,
    1284             :                *result;
    1285             : 
    1286           2 :     if (npos <= 1)
    1287           1 :         return npos;
    1288             : 
    1289           1 :     qsort((void *) pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
    1290             : 
    1291           1 :     result = pos;
    1292           1 :     pos_iter = pos + 1;
    1293           3 :     while (pos_iter < pos + npos)
    1294             :     {
    1295           1 :         if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
    1296             :         {
    1297           1 :             result++;
    1298           1 :             *result = WEP_GETPOS(*pos_iter);
    1299             :         }
    1300             : 
    1301           1 :         pos_iter++;
    1302             :     }
    1303             : 
    1304           1 :     return result + 1 - pos;
    1305             : }
    1306             : 
    1307             : /*
    1308             :  * is there value 'val' in array or not ?
    1309             :  */
    1310             : static bool
    1311       12486 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
    1312             : {
    1313       12486 :     CHKVAL     *chkval = (CHKVAL *) checkval;
    1314       12486 :     WordEntry  *StopLow = chkval->arrb;
    1315       12486 :     WordEntry  *StopHigh = chkval->arre;
    1316       12486 :     WordEntry  *StopMiddle = StopHigh;
    1317       12486 :     int         difference = -1;
    1318       12486 :     bool        res = false;
    1319             : 
    1320             :     /* Loop invariant: StopLow <= val < StopHigh */
    1321       90832 :     while (StopLow < StopHigh)
    1322             :     {
    1323       68028 :         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
    1324      204084 :         difference = tsCompareString(chkval->operand + val->distance,
    1325       68028 :                                      val->length,
    1326       68028 :                                      chkval->values + StopMiddle->pos,
    1327       68028 :                                      StopMiddle->len,
    1328             :                                      false);
    1329             : 
    1330       68028 :         if (difference == 0)
    1331             :         {
    1332             :             /* Check weight info & fill 'data' with positions */
    1333        2168 :             res = checkclass_str(chkval, StopMiddle, val, data);
    1334        2168 :             break;
    1335             :         }
    1336       65860 :         else if (difference > 0)
    1337       35226 :             StopLow = StopMiddle + 1;
    1338             :         else
    1339       30634 :             StopHigh = StopMiddle;
    1340             :     }
    1341             : 
    1342       12486 :     if ((!res || data) && val->prefix)
    1343             :     {
    1344        1652 :         WordEntryPos *allpos = NULL;
    1345        1652 :         int         npos = 0,
    1346        1652 :                     totalpos = 0;
    1347             : 
    1348             :         /*
    1349             :          * there was a failed exact search, so we should scan further to find
    1350             :          * a prefix match. We also need to do so if caller needs position info
    1351             :          */
    1352        1652 :         if (StopLow >= StopHigh)
    1353        1650 :             StopMiddle = StopHigh;
    1354             : 
    1355        6395 :         while ((!res || data) && StopMiddle < chkval->arre &&
    1356        4776 :                tsCompareString(chkval->operand + val->distance,
    1357        1592 :                                val->length,
    1358        1592 :                                chkval->values + StopMiddle->pos,
    1359        1592 :                                StopMiddle->len,
    1360             :                                true) == 0)
    1361             :         {
    1362        1499 :             if (data)
    1363             :             {
    1364             :                 /*
    1365             :                  * We need to join position information
    1366             :                  */
    1367           3 :                 res = checkclass_str(chkval, StopMiddle, val, data);
    1368             : 
    1369           3 :                 if (res)
    1370             :                 {
    1371           8 :                     while (npos + data->npos >= totalpos)
    1372             :                     {
    1373           2 :                         if (totalpos == 0)
    1374             :                         {
    1375           2 :                             totalpos = 256;
    1376           2 :                             allpos = palloc(sizeof(WordEntryPos) * totalpos);
    1377             :                         }
    1378             :                         else
    1379             :                         {
    1380           0 :                             totalpos *= 2;
    1381           0 :                             allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
    1382             :                         }
    1383             :                     }
    1384             : 
    1385           3 :                     memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
    1386           3 :                     npos += data->npos;
    1387             :                 }
    1388             :             }
    1389             :             else
    1390             :             {
    1391        1496 :                 res = checkclass_str(chkval, StopMiddle, val, NULL);
    1392             :             }
    1393             : 
    1394        1499 :             StopMiddle++;
    1395             :         }
    1396             : 
    1397        1652 :         if (res && data)
    1398             :         {
    1399             :             /* Sort and make unique array of found positions */
    1400           2 :             data->pos = allpos;
    1401           2 :             data->npos = uniqueLongPos(allpos, npos);
    1402           2 :             data->allocated = true;
    1403             :         }
    1404             :     }
    1405             : 
    1406       12486 :     return res;
    1407             : }
    1408             : 
    1409             : /*
    1410             :  * Compute output position list for a tsquery operator in phrase mode.
    1411             :  *
    1412             :  * Merge the position lists in Ldata and Rdata as specified by "emit",
    1413             :  * returning the result list into *data.  The input position lists must be
    1414             :  * sorted and unique, and the output will be as well.
    1415             :  *
    1416             :  * data: pointer to initially-all-zeroes output struct, or NULL
    1417             :  * Ldata, Rdata: input position lists
    1418             :  * emit: bitmask of TSPO_XXX flags
    1419             :  * Loffset: offset to be added to Ldata positions before comparing/outputting
    1420             :  * Roffset: offset to be added to Rdata positions before comparing/outputting
    1421             :  * max_npos: maximum possible required size of output position array
    1422             :  *
    1423             :  * Loffset and Roffset should not be negative, else we risk trying to output
    1424             :  * negative positions, which won't fit into WordEntryPos.
    1425             :  *
    1426             :  * Returns true if any positions were emitted to *data; or if data is NULL,
    1427             :  * returns true if any positions would have been emitted.
    1428             :  */
    1429             : #define TSPO_L_ONLY     0x01    /* emit positions appearing only in L */
    1430             : #define TSPO_R_ONLY     0x02    /* emit positions appearing only in R */
    1431             : #define TSPO_BOTH       0x04    /* emit positions appearing in both L&R */
    1432             : 
    1433             : static bool
    1434         107 : TS_phrase_output(ExecPhraseData *data,
    1435             :                  ExecPhraseData *Ldata,
    1436             :                  ExecPhraseData *Rdata,
    1437             :                  int emit,
    1438             :                  int Loffset,
    1439             :                  int Roffset,
    1440             :                  int max_npos)
    1441             : {
    1442             :     int         Lindex,
    1443             :                 Rindex;
    1444             : 
    1445             :     /* Loop until both inputs are exhausted */
    1446         107 :     Lindex = Rindex = 0;
    1447         287 :     while (Lindex < Ldata->npos || Rindex < Rdata->npos)
    1448             :     {
    1449             :         int         Lpos,
    1450             :                     Rpos;
    1451         143 :         int         output_pos = 0;
    1452             : 
    1453             :         /*
    1454             :          * Fetch current values to compare.  WEP_GETPOS() is needed because
    1455             :          * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
    1456             :          */
    1457         143 :         if (Lindex < Ldata->npos)
    1458         121 :             Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
    1459             :         else
    1460             :         {
    1461             :             /* L array exhausted, so we're done if R_ONLY isn't set */
    1462          22 :             if (!(emit & TSPO_R_ONLY))
    1463          10 :                 break;
    1464          12 :             Lpos = INT_MAX;
    1465             :         }
    1466         133 :         if (Rindex < Rdata->npos)
    1467         109 :             Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
    1468             :         else
    1469             :         {
    1470             :             /* R array exhausted, so we're done if L_ONLY isn't set */
    1471          24 :             if (!(emit & TSPO_L_ONLY))
    1472          11 :                 break;
    1473          13 :             Rpos = INT_MAX;
    1474             :         }
    1475             : 
    1476             :         /* Merge-join the two input lists */
    1477         122 :         if (Lpos < Rpos)
    1478             :         {
    1479             :             /* Lpos is not matched in Rdata, should we output it? */
    1480          33 :             if (emit & TSPO_L_ONLY)
    1481          17 :                 output_pos = Lpos;
    1482          33 :             Lindex++;
    1483             :         }
    1484          89 :         else if (Lpos == Rpos)
    1485             :         {
    1486             :             /* Lpos and Rpos match ... should we output it? */
    1487          64 :             if (emit & TSPO_BOTH)
    1488          62 :                 output_pos = Rpos;
    1489          64 :             Lindex++;
    1490          64 :             Rindex++;
    1491             :         }
    1492             :         else                    /* Lpos > Rpos */
    1493             :         {
    1494             :             /* Rpos is not matched in Ldata, should we output it? */
    1495          25 :             if (emit & TSPO_R_ONLY)
    1496          12 :                 output_pos = Rpos;
    1497          25 :             Rindex++;
    1498             :         }
    1499             : 
    1500         122 :         if (output_pos > 0)
    1501             :         {
    1502          91 :             if (data)
    1503             :             {
    1504             :                 /* Store position, first allocating output array if needed */
    1505          42 :                 if (data->pos == NULL)
    1506             :                 {
    1507          34 :                     data->pos = (WordEntryPos *)
    1508          34 :                         palloc(max_npos * sizeof(WordEntryPos));
    1509          34 :                     data->allocated = true;
    1510             :                 }
    1511          42 :                 data->pos[data->npos++] = output_pos;
    1512             :             }
    1513             :             else
    1514             :             {
    1515             :                 /*
    1516             :                  * Exact positions not needed, so return true as soon as we
    1517             :                  * know there is at least one.
    1518             :                  */
    1519          49 :                 return true;
    1520             :             }
    1521             :         }
    1522             :     }
    1523             : 
    1524          58 :     if (data && data->npos > 0)
    1525             :     {
    1526             :         /* Let's assert we didn't overrun the array */
    1527          34 :         Assert(data->npos <= max_npos);
    1528          34 :         return true;
    1529             :     }
    1530          24 :     return false;
    1531             : }
    1532             : 
    1533             : /*
    1534             :  * Execute tsquery at or below an OP_PHRASE operator.
    1535             :  *
    1536             :  * This handles tsquery execution at recursion levels where we need to care
    1537             :  * about match locations.
    1538             :  *
    1539             :  * In addition to the same arguments used for TS_execute, the caller may pass
    1540             :  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
    1541             :  * match position info on success.  data == NULL if no position data need be
    1542             :  * returned.  (In practice, outside callers pass NULL, and only the internal
    1543             :  * recursion cases pass a data pointer.)
    1544             :  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
    1545             :  * This is OK because an outside call always starts from an OP_PHRASE node.
    1546             :  *
    1547             :  * The detailed semantics of the match data, given that the function returned
    1548             :  * "true" (successful match, or possible match), are:
    1549             :  *
    1550             :  * npos > 0, negate = false:
    1551             :  *   query is matched at specified position(s) (and only those positions)
    1552             :  * npos > 0, negate = true:
    1553             :  *   query is matched at all positions *except* specified position(s)
    1554             :  * npos = 0, negate = false:
    1555             :  *   query is possibly matched, matching position(s) are unknown
    1556             :  *   (this should only be returned when TS_EXEC_PHRASE_NO_POS flag is set)
    1557             :  * npos = 0, negate = true:
    1558             :  *   query is matched at all positions
    1559             :  *
    1560             :  * Successful matches also return a "width" value which is the match width in
    1561             :  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
    1562             :  * and is the sum of the phrase operator distances for phrase matches.  Note
    1563             :  * that when width > 0, the listed positions represent the ends of matches not
    1564             :  * the starts.  (This unintuitive rule is needed to avoid possibly generating
    1565             :  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
    1566             :  *
    1567             :  * When the function returns "false" (no match), it must return npos = 0,
    1568             :  * negate = false (which is the state initialized by the caller); but the
    1569             :  * "width" output in such cases is undefined.
    1570             :  */
    1571             : static bool
    1572         431 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
    1573             :                   TSExecuteCallback chkcond,
    1574             :                   ExecPhraseData *data)
    1575             : {
    1576             :     ExecPhraseData Ldata,
    1577             :                 Rdata;
    1578             :     bool        lmatch,
    1579             :                 rmatch;
    1580             :     int         Loffset,
    1581             :                 Roffset,
    1582             :                 maxwidth;
    1583             : 
    1584             :     /* since this function recurses, it could be driven to stack overflow */
    1585         431 :     check_stack_depth();
    1586             : 
    1587         431 :     if (curitem->type == QI_VAL)
    1588         251 :         return chkcond(arg, (QueryOperand *) curitem, data);
    1589             : 
    1590         180 :     switch (curitem->qoperator.oper)
    1591             :     {
    1592             :         case OP_NOT:
    1593             : 
    1594             :             /*
    1595             :              * Because a "true" result with no specific positions is taken as
    1596             :              * uncertain, we need no special care here for !TS_EXEC_CALC_NOT.
    1597             :              * If it's a false positive, the right things happen anyway.
    1598             :              *
    1599             :              * Also, we need not touch data->width, since a NOT operation does
    1600             :              * not change the match width.
    1601             :              */
    1602           6 :             if (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
    1603             :             {
    1604           3 :                 if (data->npos > 0)
    1605             :                 {
    1606             :                     /* we have some positions, invert negate flag */
    1607           3 :                     data->negate = !data->negate;
    1608           3 :                     return true;
    1609             :                 }
    1610           0 :                 else if (data->negate)
    1611             :                 {
    1612             :                     /* change "match everywhere" to "match nowhere" */
    1613           0 :                     data->negate = false;
    1614           0 :                     return false;
    1615             :                 }
    1616             :                 /* match positions are, and remain, uncertain */
    1617           0 :                 return true;
    1618             :             }
    1619             :             else
    1620             :             {
    1621             :                 /* change "match nowhere" to "match everywhere" */
    1622           3 :                 Assert(data->npos == 0 && !data->negate);
    1623           3 :                 data->negate = true;
    1624           3 :                 return true;
    1625             :             }
    1626             : 
    1627             :         case OP_PHRASE:
    1628             :         case OP_AND:
    1629         152 :             memset(&Ldata, 0, sizeof(Ldata));
    1630         152 :             memset(&Rdata, 0, sizeof(Rdata));
    1631             : 
    1632         152 :             if (!TS_phrase_execute(curitem + curitem->qoperator.left,
    1633             :                                    arg, flags, chkcond, &Ldata))
    1634          39 :                 return false;
    1635             : 
    1636         113 :             if (!TS_phrase_execute(curitem + 1,
    1637             :                                    arg, flags, chkcond, &Rdata))
    1638          24 :                 return false;
    1639             : 
    1640             :             /*
    1641             :              * If either operand has no position information, then we can't
    1642             :              * return position data, only a "possible match" result. "Possible
    1643             :              * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
    1644             :              * is set, otherwise return false.
    1645             :              */
    1646         176 :             if ((Ldata.npos == 0 && !Ldata.negate) ||
    1647          88 :                 (Rdata.npos == 0 && !Rdata.negate))
    1648           2 :                 return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
    1649             : 
    1650          87 :             if (curitem->qoperator.oper == OP_PHRASE)
    1651             :             {
    1652             :                 /*
    1653             :                  * Compute Loffset and Roffset suitable for phrase match, and
    1654             :                  * compute overall width of whole phrase match.
    1655             :                  */
    1656          86 :                 Loffset = curitem->qoperator.distance + Rdata.width;
    1657          86 :                 Roffset = 0;
    1658          86 :                 if (data)
    1659          48 :                     data->width = curitem->qoperator.distance +
    1660          32 :                         Ldata.width + Rdata.width;
    1661             :             }
    1662             :             else
    1663             :             {
    1664             :                 /*
    1665             :                  * For OP_AND, set output width and alignment like OP_OR (see
    1666             :                  * comment below)
    1667             :                  */
    1668           1 :                 maxwidth = Max(Ldata.width, Rdata.width);
    1669           1 :                 Loffset = maxwidth - Ldata.width;
    1670           1 :                 Roffset = maxwidth - Rdata.width;
    1671           1 :                 if (data)
    1672           1 :                     data->width = maxwidth;
    1673             :             }
    1674             : 
    1675          87 :             if (Ldata.negate && Rdata.negate)
    1676             :             {
    1677             :                 /* !L & !R: treat as !(L | R) */
    1678           0 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1679             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1680             :                                         Loffset, Roffset,
    1681           0 :                                         Ldata.npos + Rdata.npos);
    1682           0 :                 if (data)
    1683           0 :                     data->negate = true;
    1684           0 :                 return true;
    1685             :             }
    1686          87 :             else if (Ldata.negate)
    1687             :             {
    1688             :                 /* !L & R */
    1689           5 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1690             :                                         TSPO_R_ONLY,
    1691             :                                         Loffset, Roffset,
    1692             :                                         Rdata.npos);
    1693             :             }
    1694          82 :             else if (Rdata.negate)
    1695             :             {
    1696             :                 /* L & !R */
    1697           1 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1698             :                                         TSPO_L_ONLY,
    1699             :                                         Loffset, Roffset,
    1700             :                                         Ldata.npos);
    1701             :             }
    1702             :             else
    1703             :             {
    1704             :                 /* straight AND */
    1705          81 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1706             :                                         TSPO_BOTH,
    1707             :                                         Loffset, Roffset,
    1708          81 :                                         Min(Ldata.npos, Rdata.npos));
    1709             :             }
    1710             : 
    1711             :         case OP_OR:
    1712          22 :             memset(&Ldata, 0, sizeof(Ldata));
    1713          22 :             memset(&Rdata, 0, sizeof(Rdata));
    1714             : 
    1715          22 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1716             :                                        arg, flags, chkcond, &Ldata);
    1717          22 :             rmatch = TS_phrase_execute(curitem + 1,
    1718             :                                        arg, flags, chkcond, &Rdata);
    1719             : 
    1720          22 :             if (!lmatch && !rmatch)
    1721           2 :                 return false;
    1722             : 
    1723             :             /*
    1724             :              * If a valid operand has no position information, then we can't
    1725             :              * return position data, only a "possible match" result. "Possible
    1726             :              * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
    1727             :              * is set, otherwise return false.
    1728             :              */
    1729          20 :             if ((lmatch && Ldata.npos == 0 && !Ldata.negate) ||
    1730           7 :                 (rmatch && Rdata.npos == 0 && !Rdata.negate))
    1731           0 :                 return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
    1732             : 
    1733             :             /*
    1734             :              * Cope with undefined output width from failed submatch.  (This
    1735             :              * takes less code than trying to ensure that all failure returns
    1736             :              * set data->width to zero.)
    1737             :              */
    1738          20 :             if (!lmatch)
    1739           3 :                 Ldata.width = 0;
    1740          20 :             if (!rmatch)
    1741          13 :                 Rdata.width = 0;
    1742             : 
    1743             :             /*
    1744             :              * For OP_AND and OP_OR, report the width of the wider of the two
    1745             :              * inputs, and align the narrower input's positions to the right
    1746             :              * end of that width.  This rule deals at least somewhat
    1747             :              * reasonably with cases like "x <-> (y | z <-> q)".
    1748             :              */
    1749          20 :             maxwidth = Max(Ldata.width, Rdata.width);
    1750          20 :             Loffset = maxwidth - Ldata.width;
    1751          20 :             Roffset = maxwidth - Rdata.width;
    1752          20 :             data->width = maxwidth;
    1753             : 
    1754          20 :             if (Ldata.negate && Rdata.negate)
    1755             :             {
    1756             :                 /* !L | !R: treat as !(L & R) */
    1757           0 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1758             :                                         TSPO_BOTH,
    1759             :                                         Loffset, Roffset,
    1760           0 :                                         Min(Ldata.npos, Rdata.npos));
    1761           0 :                 data->negate = true;
    1762           0 :                 return true;
    1763             :             }
    1764          20 :             else if (Ldata.negate)
    1765             :             {
    1766             :                 /* !L | R: treat as !(L & !R) */
    1767           4 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1768             :                                         TSPO_L_ONLY,
    1769             :                                         Loffset, Roffset,
    1770             :                                         Ldata.npos);
    1771           4 :                 data->negate = true;
    1772           4 :                 return true;
    1773             :             }
    1774          16 :             else if (Rdata.negate)
    1775             :             {
    1776             :                 /* L | !R: treat as !(!L & R) */
    1777           0 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1778             :                                         TSPO_R_ONLY,
    1779             :                                         Loffset, Roffset,
    1780             :                                         Rdata.npos);
    1781           0 :                 data->negate = true;
    1782           0 :                 return true;
    1783             :             }
    1784             :             else
    1785             :             {
    1786             :                 /* straight OR */
    1787          16 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1788             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1789             :                                         Loffset, Roffset,
    1790          16 :                                         Ldata.npos + Rdata.npos);
    1791             :             }
    1792             : 
    1793             :         default:
    1794           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1795             :     }
    1796             : 
    1797             :     /* not reachable, but keep compiler quiet */
    1798             :     return false;
    1799             : }
    1800             : 
    1801             : 
    1802             : /*
    1803             :  * Evaluate tsquery boolean expression.
    1804             :  *
    1805             :  * curitem: current tsquery item (initially, the first one)
    1806             :  * arg: opaque value to pass through to callback function
    1807             :  * flags: bitmask of flag bits shown in ts_utils.h
    1808             :  * chkcond: callback function to check whether a primitive value is present
    1809             :  *
    1810             :  * The logic here deals only with operators above any phrase operator, for
    1811             :  * which we do not need to worry about lexeme positions.  As soon as we hit an
    1812             :  * OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
    1813             :  */
    1814             : bool
    1815       48005 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
    1816             :            TSExecuteCallback chkcond)
    1817             : {
    1818             :     /* since this function recurses, it could be driven to stack overflow */
    1819       48005 :     check_stack_depth();
    1820             : 
    1821       48005 :     if (curitem->type == QI_VAL)
    1822       27352 :         return chkcond(arg, (QueryOperand *) curitem,
    1823             :                        NULL /* we don't need position info */ );
    1824             : 
    1825       20653 :     switch (curitem->qoperator.oper)
    1826             :     {
    1827             :         case OP_NOT:
    1828        2580 :             if (flags & TS_EXEC_CALC_NOT)
    1829        2542 :                 return !TS_execute(curitem + 1, arg, flags, chkcond);
    1830             :             else
    1831          38 :                 return true;
    1832             : 
    1833             :         case OP_AND:
    1834        8057 :             if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
    1835        1484 :                 return TS_execute(curitem + 1, arg, flags, chkcond);
    1836             :             else
    1837        6573 :                 return false;
    1838             : 
    1839             :         case OP_OR:
    1840        9900 :             if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
    1841        3697 :                 return true;
    1842             :             else
    1843        6203 :                 return TS_execute(curitem + 1, arg, flags, chkcond);
    1844             : 
    1845             :         case OP_PHRASE:
    1846         116 :             return TS_phrase_execute(curitem, arg, flags, chkcond, NULL);
    1847             : 
    1848             :         default:
    1849           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1850             :     }
    1851             : 
    1852             :     /* not reachable, but keep compiler quiet */
    1853             :     return false;
    1854             : }
    1855             : 
    1856             : /*
    1857             :  * Detect whether a tsquery boolean expression requires any positive matches
    1858             :  * to values shown in the tsquery.
    1859             :  *
    1860             :  * This is needed to know whether a GIN index search requires full index scan.
    1861             :  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
    1862             :  * entries for x; but 'x | !y' could match rows containing neither x nor y.
    1863             :  */
    1864             : bool
    1865          80 : tsquery_requires_match(QueryItem *curitem)
    1866             : {
    1867             :     /* since this function recurses, it could be driven to stack overflow */
    1868          80 :     check_stack_depth();
    1869             : 
    1870          80 :     if (curitem->type == QI_VAL)
    1871          47 :         return true;
    1872             : 
    1873          33 :     switch (curitem->qoperator.oper)
    1874             :     {
    1875             :         case OP_NOT:
    1876             : 
    1877             :             /*
    1878             :              * Assume there are no required matches underneath a NOT.  For
    1879             :              * some cases with nested NOTs, we could prove there's a required
    1880             :              * match, but it seems unlikely to be worth the trouble.
    1881             :              */
    1882           2 :             return false;
    1883             : 
    1884             :         case OP_PHRASE:
    1885             : 
    1886             :             /*
    1887             :              * Treat OP_PHRASE as OP_AND here
    1888             :              */
    1889             :         case OP_AND:
    1890             :             /* If either side requires a match, we're good */
    1891          20 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    1892          20 :                 return true;
    1893             :             else
    1894           0 :                 return tsquery_requires_match(curitem + 1);
    1895             : 
    1896             :         case OP_OR:
    1897             :             /* Both sides must require a match */
    1898          11 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    1899          11 :                 return tsquery_requires_match(curitem + 1);
    1900             :             else
    1901           0 :                 return false;
    1902             : 
    1903             :         default:
    1904           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1905             :     }
    1906             : 
    1907             :     /* not reachable, but keep compiler quiet */
    1908             :     return false;
    1909             : }
    1910             : 
    1911             : /*
    1912             :  * boolean operations
    1913             :  */
    1914             : Datum
    1915          10 : ts_match_qv(PG_FUNCTION_ARGS)
    1916             : {
    1917          10 :     PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
    1918             :                                         PG_GETARG_DATUM(1),
    1919             :                                         PG_GETARG_DATUM(0)));
    1920             : }
    1921             : 
    1922             : Datum
    1923        9479 : ts_match_vq(PG_FUNCTION_ARGS)
    1924             : {
    1925        9479 :     TSVector    val = PG_GETARG_TSVECTOR(0);
    1926        9479 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    1927             :     CHKVAL      chkval;
    1928             :     bool        result;
    1929             : 
    1930             :     /* empty query matches nothing */
    1931        9479 :     if (!query->size)
    1932             :     {
    1933           0 :         PG_FREE_IF_COPY(val, 0);
    1934           0 :         PG_FREE_IF_COPY(query, 1);
    1935           0 :         PG_RETURN_BOOL(false);
    1936             :     }
    1937             : 
    1938        9479 :     chkval.arrb = ARRPTR(val);
    1939        9479 :     chkval.arre = chkval.arrb + val->size;
    1940        9479 :     chkval.values = STRPTR(val);
    1941        9479 :     chkval.operand = GETOPERAND(query);
    1942        9479 :     result = TS_execute(GETQUERY(query),
    1943             :                         &chkval,
    1944             :                         TS_EXEC_CALC_NOT,
    1945             :                         checkcondition_str);
    1946             : 
    1947        9479 :     PG_FREE_IF_COPY(val, 0);
    1948        9479 :     PG_FREE_IF_COPY(query, 1);
    1949        9479 :     PG_RETURN_BOOL(result);
    1950             : }
    1951             : 
    1952             : Datum
    1953           0 : ts_match_tt(PG_FUNCTION_ARGS)
    1954             : {
    1955             :     TSVector    vector;
    1956             :     TSQuery     query;
    1957             :     bool        res;
    1958             : 
    1959           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    1960             :                                                   PG_GETARG_DATUM(0)));
    1961           0 :     query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
    1962             :                                                 PG_GETARG_DATUM(1)));
    1963             : 
    1964           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    1965             :                                            TSVectorGetDatum(vector),
    1966             :                                            TSQueryGetDatum(query)));
    1967             : 
    1968           0 :     pfree(vector);
    1969           0 :     pfree(query);
    1970             : 
    1971           0 :     PG_RETURN_BOOL(res);
    1972             : }
    1973             : 
    1974             : Datum
    1975           0 : ts_match_tq(PG_FUNCTION_ARGS)
    1976             : {
    1977             :     TSVector    vector;
    1978           0 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    1979             :     bool        res;
    1980             : 
    1981           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    1982             :                                                   PG_GETARG_DATUM(0)));
    1983             : 
    1984           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    1985             :                                            TSVectorGetDatum(vector),
    1986             :                                            TSQueryGetDatum(query)));
    1987             : 
    1988           0 :     pfree(vector);
    1989           0 :     PG_FREE_IF_COPY(query, 1);
    1990             : 
    1991           0 :     PG_RETURN_BOOL(res);
    1992             : }
    1993             : 
    1994             : /*
    1995             :  * ts_stat statistic function support
    1996             :  */
    1997             : 
    1998             : 
    1999             : /*
    2000             :  * Returns the number of positions in value 'wptr' within tsvector 'txt',
    2001             :  * that have a weight equal to one of the weights in 'weight' bitmask.
    2002             :  */
    2003             : static int
    2004           1 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
    2005             : {
    2006           1 :     int         len = POSDATALEN(txt, wptr);
    2007           1 :     int         num = 0;
    2008           1 :     WordEntryPos *ptr = POSDATAPTR(txt, wptr);
    2009             : 
    2010           6 :     while (len--)
    2011             :     {
    2012           4 :         if (weight & (1 << WEP_GETWEIGHT(*ptr)))
    2013           2 :             num++;
    2014           4 :         ptr++;
    2015             :     }
    2016           1 :     return num;
    2017             : }
    2018             : 
    2019             : #define compareStatWord(a,e,t)                          \
    2020             :     tsCompareString((a)->lexeme, (a)->lenlexeme,      \
    2021             :                     STRPTR(t) + (e)->pos, (e)->len,       \
    2022             :                     false)
    2023             : 
    2024             : static void
    2025       57638 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
    2026             : {
    2027       57638 :     WordEntry  *we = ARRPTR(txt) + off;
    2028       57638 :     StatEntry  *node = stat->root,
    2029       57638 :                *pnode = NULL;
    2030             :     int         n,
    2031       57638 :                 res = 0;
    2032       57638 :     uint32      depth = 1;
    2033             : 
    2034       57638 :     if (stat->weight == 0)
    2035       28819 :         n = (we->haspos) ? POSDATALEN(txt, we) : 1;
    2036             :     else
    2037       28819 :         n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
    2038             : 
    2039       57638 :     if (n == 0)
    2040       86456 :         return;                 /* nothing to insert */
    2041             : 
    2042      319871 :     while (node)
    2043             :     {
    2044      289907 :         res = compareStatWord(node, we, txt);
    2045             : 
    2046      289907 :         if (res == 0)
    2047             :         {
    2048       27676 :             break;
    2049             :         }
    2050             :         else
    2051             :         {
    2052      262231 :             pnode = node;
    2053      262231 :             node = (res < 0) ? node->left : node->right;
    2054             :         }
    2055      262231 :         depth++;
    2056             :     }
    2057             : 
    2058       28820 :     if (depth > stat->maxdepth)
    2059          21 :         stat->maxdepth = depth;
    2060             : 
    2061       28820 :     if (node == NULL)
    2062             :     {
    2063        1144 :         node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
    2064        1144 :         node->left = node->right = NULL;
    2065        1144 :         node->ndoc = 1;
    2066        1144 :         node->nentry = n;
    2067        1144 :         node->lenlexeme = we->len;
    2068        1144 :         memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
    2069             : 
    2070        1144 :         if (pnode == NULL)
    2071             :         {
    2072           2 :             stat->root = node;
    2073             :         }
    2074             :         else
    2075             :         {
    2076        1142 :             if (res < 0)
    2077         564 :                 pnode->left = node;
    2078             :             else
    2079         578 :                 pnode->right = node;
    2080             :         }
    2081             : 
    2082             :     }
    2083             :     else
    2084             :     {
    2085       27676 :         node->ndoc++;
    2086       27676 :         node->nentry += n;
    2087             :     }
    2088             : }
    2089             : 
    2090             : static void
    2091       82692 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
    2092             :                     uint32 low, uint32 high, uint32 offset)
    2093             : {
    2094             :     uint32      pos;
    2095       82692 :     uint32      middle = (low + high) >> 1;
    2096             : 
    2097       82692 :     pos = (low + middle) >> 1;
    2098       82692 :     if (low != middle && pos >= offset && pos - offset < txt->size)
    2099       28406 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2100       82692 :     pos = (high + middle + 1) >> 1;
    2101       82692 :     if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
    2102       28230 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2103             : 
    2104       82692 :     if (low != middle)
    2105       41346 :         chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
    2106       82692 :     if (high != middle + 1)
    2107       40344 :         chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
    2108       82692 : }
    2109             : 
    2110             : /*
    2111             :  * This is written like a custom aggregate function, because the
    2112             :  * original plan was to do just that. Unfortunately, an aggregate function
    2113             :  * can't return a set, so that plan was abandoned. If that limitation is
    2114             :  * lifted in the future, ts_stat could be a real aggregate function so that
    2115             :  * you could use it like this:
    2116             :  *
    2117             :  *   SELECT ts_stat(vector_column) FROM vector_table;
    2118             :  *
    2119             :  *  where vector_column is a tsvector-type column in vector_table.
    2120             :  */
    2121             : 
    2122             : static TSVectorStat *
    2123        1018 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
    2124             : {
    2125        1018 :     TSVector    txt = DatumGetTSVector(data);
    2126             :     uint32      i,
    2127        1018 :                 nbit = 0,
    2128             :                 offset;
    2129             : 
    2130        1018 :     if (stat == NULL)
    2131             :     {                           /* Init in first */
    2132           0 :         stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2133           0 :         stat->maxdepth = 1;
    2134             :     }
    2135             : 
    2136             :     /* simple check of correctness */
    2137        1018 :     if (txt == NULL || txt->size == 0)
    2138             :     {
    2139          16 :         if (txt && txt != (TSVector) DatumGetPointer(data))
    2140          16 :             pfree(txt);
    2141          16 :         return stat;
    2142             :     }
    2143             : 
    2144        1002 :     i = txt->size - 1;
    2145        7122 :     for (; i > 0; i >>= 1)
    2146        6120 :         nbit++;
    2147             : 
    2148        1002 :     nbit = 1 << nbit;
    2149        1002 :     offset = (nbit - txt->size) / 2;
    2150             : 
    2151        1002 :     insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
    2152        1002 :     chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
    2153             : 
    2154        1002 :     return stat;
    2155             : }
    2156             : 
    2157             : static void
    2158           2 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
    2159             :                    TSVectorStat *stat)
    2160             : {
    2161             :     TupleDesc   tupdesc;
    2162             :     MemoryContext oldcontext;
    2163             :     StatEntry  *node;
    2164             : 
    2165           2 :     funcctx->user_fctx = (void *) stat;
    2166             : 
    2167           2 :     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    2168             : 
    2169           2 :     stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
    2170           2 :     stat->stackpos = 0;
    2171             : 
    2172           2 :     node = stat->root;
    2173             :     /* find leftmost value */
    2174           2 :     if (node == NULL)
    2175           0 :         stat->stack[stat->stackpos] = NULL;
    2176             :     else
    2177             :         for (;;)
    2178             :         {
    2179           8 :             stat->stack[stat->stackpos] = node;
    2180           8 :             if (node->left)
    2181             :             {
    2182           6 :                 stat->stackpos++;
    2183           6 :                 node = node->left;
    2184             :             }
    2185             :             else
    2186           2 :                 break;
    2187           6 :         }
    2188           2 :     Assert(stat->stackpos <= stat->maxdepth);
    2189             : 
    2190           2 :     tupdesc = CreateTemplateTupleDesc(3, false);
    2191           2 :     TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
    2192             :                        TEXTOID, -1, 0);
    2193           2 :     TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
    2194             :                        INT4OID, -1, 0);
    2195           2 :     TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
    2196             :                        INT4OID, -1, 0);
    2197           2 :     funcctx->tuple_desc = BlessTupleDesc(tupdesc);
    2198           2 :     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    2199             : 
    2200           2 :     MemoryContextSwitchTo(oldcontext);
    2201           2 : }
    2202             : 
    2203             : static StatEntry *
    2204        2288 : walkStatEntryTree(TSVectorStat *stat)
    2205             : {
    2206        2288 :     StatEntry  *node = stat->stack[stat->stackpos];
    2207             : 
    2208        2288 :     if (node == NULL)
    2209           0 :         return NULL;
    2210             : 
    2211        2288 :     if (node->ndoc != 0)
    2212             :     {
    2213             :         /* return entry itself: we already was at left sublink */
    2214         566 :         return node;
    2215             :     }
    2216        1722 :     else if (node->right && node->right != stat->stack[stat->stackpos + 1])
    2217             :     {
    2218             :         /* go on right sublink */
    2219         578 :         stat->stackpos++;
    2220         578 :         node = node->right;
    2221             : 
    2222             :         /* find most-left value */
    2223             :         for (;;)
    2224             :         {
    2225        1136 :             stat->stack[stat->stackpos] = node;
    2226        1136 :             if (node->left)
    2227             :             {
    2228         558 :                 stat->stackpos++;
    2229         558 :                 node = node->left;
    2230             :             }
    2231             :             else
    2232         578 :                 break;
    2233         558 :         }
    2234         578 :         Assert(stat->stackpos <= stat->maxdepth);
    2235             :     }
    2236             :     else
    2237             :     {
    2238             :         /* we already return all left subtree, itself and  right subtree */
    2239        1144 :         if (stat->stackpos == 0)
    2240           2 :             return NULL;
    2241             : 
    2242        1142 :         stat->stackpos--;
    2243        1142 :         return walkStatEntryTree(stat);
    2244             :     }
    2245             : 
    2246         578 :     return node;
    2247             : }
    2248             : 
    2249             : static Datum
    2250        1146 : ts_process_call(FuncCallContext *funcctx)
    2251             : {
    2252             :     TSVectorStat *st;
    2253             :     StatEntry  *entry;
    2254             : 
    2255        1146 :     st = (TSVectorStat *) funcctx->user_fctx;
    2256             : 
    2257        1146 :     entry = walkStatEntryTree(st);
    2258             : 
    2259        1146 :     if (entry != NULL)
    2260             :     {
    2261             :         Datum       result;
    2262             :         char       *values[3];
    2263             :         char        ndoc[16];
    2264             :         char        nentry[16];
    2265             :         HeapTuple   tuple;
    2266             : 
    2267        1144 :         values[0] = palloc(entry->lenlexeme + 1);
    2268        1144 :         memcpy(values[0], entry->lexeme, entry->lenlexeme);
    2269        1144 :         (values[0])[entry->lenlexeme] = '\0';
    2270        1144 :         sprintf(ndoc, "%d", entry->ndoc);
    2271        1144 :         values[1] = ndoc;
    2272        1144 :         sprintf(nentry, "%d", entry->nentry);
    2273        1144 :         values[2] = nentry;
    2274             : 
    2275        1144 :         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
    2276        1144 :         result = HeapTupleGetDatum(tuple);
    2277             : 
    2278        1144 :         pfree(values[0]);
    2279             : 
    2280             :         /* mark entry as already visited */
    2281        1144 :         entry->ndoc = 0;
    2282             : 
    2283        1144 :         return result;
    2284             :     }
    2285             : 
    2286           2 :     return (Datum) 0;
    2287             : }
    2288             : 
    2289             : static TSVectorStat *
    2290           2 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
    2291             : {
    2292           2 :     char       *query = text_to_cstring(txt);
    2293             :     TSVectorStat *stat;
    2294             :     bool        isnull;
    2295             :     Portal      portal;
    2296             :     SPIPlanPtr  plan;
    2297             : 
    2298           2 :     if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
    2299             :         /* internal error */
    2300           0 :         elog(ERROR, "SPI_prepare(\"%s\") failed", query);
    2301             : 
    2302           2 :     if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
    2303             :         /* internal error */
    2304           0 :         elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
    2305             : 
    2306           2 :     SPI_cursor_fetch(portal, true, 100);
    2307             : 
    2308           4 :     if (SPI_tuptable == NULL ||
    2309           4 :         SPI_tuptable->tupdesc->natts != 1 ||
    2310           2 :         !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
    2311             :                            TSVECTOROID))
    2312           0 :         ereport(ERROR,
    2313             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2314             :                  errmsg("ts_stat query must return one tsvector column")));
    2315             : 
    2316           2 :     stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2317           2 :     stat->maxdepth = 1;
    2318             : 
    2319           2 :     if (ws)
    2320             :     {
    2321             :         char       *buf;
    2322             : 
    2323           1 :         buf = VARDATA_ANY(ws);
    2324           4 :         while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
    2325             :         {
    2326           2 :             if (pg_mblen(buf) == 1)
    2327             :             {
    2328           2 :                 switch (*buf)
    2329             :                 {
    2330             :                     case 'A':
    2331             :                     case 'a':
    2332           1 :                         stat->weight |= 1 << 3;
    2333           1 :                         break;
    2334             :                     case 'B':
    2335             :                     case 'b':
    2336           1 :                         stat->weight |= 1 << 2;
    2337           1 :                         break;
    2338             :                     case 'C':
    2339             :                     case 'c':
    2340           0 :                         stat->weight |= 1 << 1;
    2341           0 :                         break;
    2342             :                     case 'D':
    2343             :                     case 'd':
    2344           0 :                         stat->weight |= 1;
    2345           0 :                         break;
    2346             :                     default:
    2347           0 :                         stat->weight |= 0;
    2348             :                 }
    2349             :             }
    2350           2 :             buf += pg_mblen(buf);
    2351             :         }
    2352             :     }
    2353             : 
    2354          16 :     while (SPI_processed > 0)
    2355             :     {
    2356             :         uint64      i;
    2357             : 
    2358        1030 :         for (i = 0; i < SPI_processed; i++)
    2359             :         {
    2360        1018 :             Datum       data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
    2361             : 
    2362        1018 :             if (!isnull)
    2363        1018 :                 stat = ts_accum(persistentContext, stat, data);
    2364             :         }
    2365             : 
    2366          12 :         SPI_freetuptable(SPI_tuptable);
    2367          12 :         SPI_cursor_fetch(portal, true, 100);
    2368             :     }
    2369             : 
    2370           2 :     SPI_freetuptable(SPI_tuptable);
    2371           2 :     SPI_cursor_close(portal);
    2372           2 :     SPI_freeplan(plan);
    2373           2 :     pfree(query);
    2374             : 
    2375           2 :     return stat;
    2376             : }
    2377             : 
    2378             : Datum
    2379        1144 : ts_stat1(PG_FUNCTION_ARGS)
    2380             : {
    2381             :     FuncCallContext *funcctx;
    2382             :     Datum       result;
    2383             : 
    2384        1144 :     if (SRF_IS_FIRSTCALL())
    2385             :     {
    2386             :         TSVectorStat *stat;
    2387           1 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2388             : 
    2389           1 :         funcctx = SRF_FIRSTCALL_INIT();
    2390           1 :         SPI_connect();
    2391           1 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
    2392           1 :         PG_FREE_IF_COPY(txt, 0);
    2393           1 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2394           1 :         SPI_finish();
    2395             :     }
    2396             : 
    2397        1144 :     funcctx = SRF_PERCALL_SETUP();
    2398        1144 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2399        1143 :         SRF_RETURN_NEXT(funcctx, result);
    2400           1 :     SRF_RETURN_DONE(funcctx);
    2401             : }
    2402             : 
    2403             : Datum
    2404           2 : ts_stat2(PG_FUNCTION_ARGS)
    2405             : {
    2406             :     FuncCallContext *funcctx;
    2407             :     Datum       result;
    2408             : 
    2409           2 :     if (SRF_IS_FIRSTCALL())
    2410             :     {
    2411             :         TSVectorStat *stat;
    2412           1 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2413           1 :         text       *ws = PG_GETARG_TEXT_PP(1);
    2414             : 
    2415           1 :         funcctx = SRF_FIRSTCALL_INIT();
    2416           1 :         SPI_connect();
    2417           1 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
    2418           1 :         PG_FREE_IF_COPY(txt, 0);
    2419           1 :         PG_FREE_IF_COPY(ws, 1);
    2420           1 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2421           1 :         SPI_finish();
    2422             :     }
    2423             : 
    2424           2 :     funcctx = SRF_PERCALL_SETUP();
    2425           2 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2426           1 :         SRF_RETURN_NEXT(funcctx, result);
    2427           1 :     SRF_RETURN_DONE(funcctx);
    2428             : }
    2429             : 
    2430             : 
    2431             : /*
    2432             :  * Triggers for automatic update of a tsvector column from text column(s)
    2433             :  *
    2434             :  * Trigger arguments are either
    2435             :  *      name of tsvector col, name of tsconfig to use, name(s) of text col(s)
    2436             :  *      name of tsvector col, name of regconfig col, name(s) of text col(s)
    2437             :  * ie, tsconfig can either be specified by name, or indirectly as the
    2438             :  * contents of a regconfig field in the row.  If the name is used, it must
    2439             :  * be explicitly schema-qualified.
    2440             :  */
    2441             : Datum
    2442           3 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
    2443             : {
    2444           3 :     return tsvector_update_trigger(fcinfo, false);
    2445             : }
    2446             : 
    2447             : Datum
    2448           0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
    2449             : {
    2450           0 :     return tsvector_update_trigger(fcinfo, true);
    2451             : }
    2452             : 
    2453             : static Datum
    2454           3 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
    2455             : {
    2456             :     TriggerData *trigdata;
    2457             :     Trigger    *trigger;
    2458             :     Relation    rel;
    2459           3 :     HeapTuple   rettuple = NULL;
    2460             :     int         tsvector_attr_num,
    2461             :                 i;
    2462             :     ParsedText  prs;
    2463             :     Datum       datum;
    2464             :     bool        isnull;
    2465             :     text       *txt;
    2466             :     Oid         cfgId;
    2467             : 
    2468             :     /* Check call context */
    2469           3 :     if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
    2470           0 :         elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
    2471             : 
    2472           3 :     trigdata = (TriggerData *) fcinfo->context;
    2473           3 :     if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
    2474           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for row");
    2475           3 :     if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
    2476           0 :         elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
    2477             : 
    2478           3 :     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
    2479           2 :         rettuple = trigdata->tg_trigtuple;
    2480           1 :     else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
    2481           1 :         rettuple = trigdata->tg_newtuple;
    2482             :     else
    2483           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
    2484             : 
    2485           3 :     trigger = trigdata->tg_trigger;
    2486           3 :     rel = trigdata->tg_relation;
    2487             : 
    2488           3 :     if (trigger->tgnargs < 3)
    2489           0 :         elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
    2490             : 
    2491             :     /* Find the target tsvector column */
    2492           3 :     tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
    2493           3 :     if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
    2494           0 :         ereport(ERROR,
    2495             :                 (errcode(ERRCODE_UNDEFINED_COLUMN),
    2496             :                  errmsg("tsvector column \"%s\" does not exist",
    2497             :                         trigger->tgargs[0])));
    2498             :     /* This will effectively reject system columns, so no separate test: */
    2499           3 :     if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
    2500             :                            TSVECTOROID))
    2501           0 :         ereport(ERROR,
    2502             :                 (errcode(ERRCODE_DATATYPE_MISMATCH),
    2503             :                  errmsg("column \"%s\" is not of tsvector type",
    2504             :                         trigger->tgargs[0])));
    2505             : 
    2506             :     /* Find the configuration to use */
    2507           3 :     if (config_column)
    2508             :     {
    2509             :         int         config_attr_num;
    2510             : 
    2511           0 :         config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
    2512           0 :         if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
    2513           0 :             ereport(ERROR,
    2514             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2515             :                      errmsg("configuration column \"%s\" does not exist",
    2516             :                             trigger->tgargs[1])));
    2517           0 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
    2518             :                                REGCONFIGOID))
    2519           0 :             ereport(ERROR,
    2520             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2521             :                      errmsg("column \"%s\" is not of regconfig type",
    2522             :                             trigger->tgargs[1])));
    2523             : 
    2524           0 :         datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
    2525           0 :         if (isnull)
    2526           0 :             ereport(ERROR,
    2527             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    2528             :                      errmsg("configuration column \"%s\" must not be null",
    2529             :                             trigger->tgargs[1])));
    2530           0 :         cfgId = DatumGetObjectId(datum);
    2531             :     }
    2532             :     else
    2533             :     {
    2534             :         List       *names;
    2535             : 
    2536           3 :         names = stringToQualifiedNameList(trigger->tgargs[1]);
    2537             :         /* require a schema so that results are not search path dependent */
    2538           3 :         if (list_length(names) < 2)
    2539           0 :             ereport(ERROR,
    2540             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2541             :                      errmsg("text search configuration name \"%s\" must be schema-qualified",
    2542             :                             trigger->tgargs[1])));
    2543           3 :         cfgId = get_ts_config_oid(names, false);
    2544             :     }
    2545             : 
    2546             :     /* initialize parse state */
    2547           3 :     prs.lenwords = 32;
    2548           3 :     prs.curwords = 0;
    2549           3 :     prs.pos = 0;
    2550           3 :     prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
    2551             : 
    2552             :     /* find all words in indexable column(s) */
    2553           6 :     for (i = 2; i < trigger->tgnargs; i++)
    2554             :     {
    2555             :         int         numattr;
    2556             : 
    2557           3 :         numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
    2558           3 :         if (numattr == SPI_ERROR_NOATTRIBUTE)
    2559           0 :             ereport(ERROR,
    2560             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2561             :                      errmsg("column \"%s\" does not exist",
    2562             :                             trigger->tgargs[i])));
    2563           3 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
    2564           0 :             ereport(ERROR,
    2565             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2566             :                      errmsg("column \"%s\" is not of a character type",
    2567             :                             trigger->tgargs[i])));
    2568             : 
    2569           3 :         datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
    2570           3 :         if (isnull)
    2571           1 :             continue;
    2572             : 
    2573           2 :         txt = DatumGetTextPP(datum);
    2574             : 
    2575           2 :         parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
    2576             : 
    2577           2 :         if (txt != (text *) DatumGetPointer(datum))
    2578           0 :             pfree(txt);
    2579             :     }
    2580             : 
    2581             :     /* make tsvector value */
    2582           3 :     datum = TSVectorGetDatum(make_tsvector(&prs));
    2583           3 :     isnull = false;
    2584             : 
    2585             :     /* and insert it into tuple */
    2586           3 :     rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
    2587             :                                          1, &tsvector_attr_num,
    2588             :                                          &datum, &isnull);
    2589             : 
    2590           3 :     pfree(DatumGetPointer(datum));
    2591             : 
    2592           3 :     return PointerGetDatum(rettuple);
    2593             : }

Generated by: LCOV version 1.11