LCOV - PostgreSQL - src/backend/utils/adt/varlena.c

LCOV - code coverage report

Current view:	top level - src/backend/utils/adt - varlena.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL	Lines:	1355	1699	79.8 %
Date:	2017-09-29 13:40:31	Functions:	109	122	89.3 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * varlena.c
       4             :  *    Functions for the variable-length built-in types.
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  * Portions Copyright (c) 1994, Regents of the University of California
       8             :  *
       9             :  *
      10             :  * IDENTIFICATION
      11             :  *    src/backend/utils/adt/varlena.c
      12             :  *
      13             :  *-------------------------------------------------------------------------
      14             :  */
      15             : #include "postgres.h"
      16             : 
      17             : #include <ctype.h>
      18             : #include <limits.h>
      19             : 
      20             : #include "access/hash.h"
      21             : #include "access/tuptoaster.h"
      22             : #include "catalog/pg_collation.h"
      23             : #include "catalog/pg_type.h"
      24             : #include "common/md5.h"
      25             : #include "lib/hyperloglog.h"
      26             : #include "libpq/pqformat.h"
      27             : #include "miscadmin.h"
      28             : #include "parser/scansup.h"
      29             : #include "port/pg_bswap.h"
      30             : #include "regex/regex.h"
      31             : #include "utils/builtins.h"
      32             : #include "utils/bytea.h"
      33             : #include "utils/lsyscache.h"
      34             : #include "utils/memutils.h"
      35             : #include "utils/pg_locale.h"
      36             : #include "utils/sortsupport.h"
      37             : #include "utils/varlena.h"
      38             : 
      39             : 
      40             : /* GUC variable */
      41             : int         bytea_output = BYTEA_OUTPUT_HEX;
      42             : 
      43             : typedef struct varlena unknown;
      44             : typedef struct varlena VarString;
      45             : 
      46             : typedef struct
      47             : {
      48             :     bool        use_wchar;      /* T if multibyte encoding */
      49             :     char       *str1;           /* use these if not use_wchar */
      50             :     char       *str2;           /* note: these point to original texts */
      51             :     pg_wchar   *wstr1;          /* use these if use_wchar */
      52             :     pg_wchar   *wstr2;          /* note: these are palloc'd */
      53             :     int         len1;           /* string lengths in logical characters */
      54             :     int         len2;
      55             :     /* Skip table for Boyer-Moore-Horspool search algorithm: */
      56             :     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
      57             :     int         skiptable[256]; /* skip distance for given mismatched char */
      58             : } TextPositionState;
      59             : 
      60             : typedef struct
      61             : {
      62             :     char       *buf1;           /* 1st string, or abbreviation original string
      63             :                                  * buf */
      64             :     char       *buf2;           /* 2nd string, or abbreviation strxfrm() buf */
      65             :     int         buflen1;
      66             :     int         buflen2;
      67             :     int         last_len1;      /* Length of last buf1 string/strxfrm() input */
      68             :     int         last_len2;      /* Length of last buf2 string/strxfrm() blob */
      69             :     int         last_returned;  /* Last comparison result (cache) */
      70             :     bool        cache_blob;     /* Does buf2 contain strxfrm() blob, etc? */
      71             :     bool        collate_c;
      72             :     bool        bpchar;         /* Sorting bpchar, not varchar/text/bytea? */
      73             :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
      74             :     hyperLogLogState full_card; /* Full key cardinality state */
      75             :     double      prop_card;      /* Required cardinality proportion */
      76             :     pg_locale_t locale;
      77             : } VarStringSortSupport;
      78             : 
      79             : /*
      80             :  * This should be large enough that most strings will fit, but small enough
      81             :  * that we feel comfortable putting it on the stack
      82             :  */
      83             : #define TEXTBUFLEN      1024
      84             : 
      85             : #define DatumGetUnknownP(X)         ((unknown *) PG_DETOAST_DATUM(X))
      86             : #define DatumGetUnknownPCopy(X)     ((unknown *) PG_DETOAST_DATUM_COPY(X))
      87             : #define PG_GETARG_UNKNOWN_P(n)      DatumGetUnknownP(PG_GETARG_DATUM(n))
      88             : #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
      89             : #define PG_RETURN_UNKNOWN_P(x)      PG_RETURN_POINTER(x)
      90             : 
      91             : #define DatumGetVarStringP(X)       ((VarString *) PG_DETOAST_DATUM(X))
      92             : #define DatumGetVarStringPP(X)      ((VarString *) PG_DETOAST_DATUM_PACKED(X))
      93             : 
      94             : static int  varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
      95             : static int  bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
      96             : static int  varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
      97             : static int  varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
      98             : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
      99             : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
     100             : static int32 text_length(Datum str);
     101             : static text *text_catenate(text *t1, text *t2);
     102             : static text *text_substring(Datum str,
     103             :                int32 start,
     104             :                int32 length,
     105             :                bool length_not_specified);
     106             : static text *text_overlay(text *t1, text *t2, int sp, int sl);
     107             : static int  text_position(text *t1, text *t2);
     108             : static void text_position_setup(text *t1, text *t2, TextPositionState *state);
     109             : static int  text_position_next(int start_pos, TextPositionState *state);
     110             : static void text_position_cleanup(TextPositionState *state);
     111             : static int  text_cmp(text *arg1, text *arg2, Oid collid);
     112             : static bytea *bytea_catenate(bytea *t1, bytea *t2);
     113             : static bytea *bytea_substring(Datum str,
     114             :                 int S,
     115             :                 int L,
     116             :                 bool length_not_specified);
     117             : static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
     118             : static void appendStringInfoText(StringInfo str, const text *t);
     119             : static Datum text_to_array_internal(PG_FUNCTION_ARGS);
     120             : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
     121             :                        const char *fldsep, const char *null_string);
     122             : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
     123             : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
     124             :                          int *value);
     125             : static const char *text_format_parse_format(const char *start_ptr,
     126             :                          const char *end_ptr,
     127             :                          int *argpos, int *widthpos,
     128             :                          int *flags, int *width);
     129             : static void text_format_string_conversion(StringInfo buf, char conversion,
     130             :                               FmgrInfo *typOutputInfo,
     131             :                               Datum value, bool isNull,
     132             :                               int flags, int width);
     133             : static void text_format_append_string(StringInfo buf, const char *str,
     134             :                           int flags, int width);
     135             : 
     136             : 
     137             : /*****************************************************************************
     138             :  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
     139             :  *****************************************************************************/
     140             : 
     141             : /*
     142             :  * cstring_to_text
     143             :  *
     144             :  * Create a text value from a null-terminated C string.
     145             :  *
     146             :  * The new text value is freshly palloc'd with a full-size VARHDR.
     147             :  */
     148             : text *
     149      263161 : cstring_to_text(const char *s)
     150             : {
     151      263161 :     return cstring_to_text_with_len(s, strlen(s));
     152             : }
     153             : 
     154             : /*
     155             :  * cstring_to_text_with_len
     156             :  *
     157             :  * Same as cstring_to_text except the caller specifies the string length;
     158             :  * the string need not be null_terminated.
     159             :  */
     160             : text *
     161      316872 : cstring_to_text_with_len(const char *s, int len)
     162             : {
     163      316872 :     text       *result = (text *) palloc(len + VARHDRSZ);
     164             : 
     165      316872 :     SET_VARSIZE(result, len + VARHDRSZ);
     166      316872 :     memcpy(VARDATA(result), s, len);
     167             : 
     168      316872 :     return result;
     169             : }
     170             : 
     171             : /*
     172             :  * text_to_cstring
     173             :  *
     174             :  * Create a palloc'd, null-terminated C string from a text value.
     175             :  *
     176             :  * We support being passed a compressed or toasted text value.
     177             :  * This is a bit bogus since such values shouldn't really be referred to as
     178             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     179             :  * case here, we'd need another routine that did, anyway.
     180             :  */
     181             : char *
     182       89391 : text_to_cstring(const text *t)
     183             : {
     184             :     /* must cast away the const, unfortunately */
     185       89391 :     text       *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
     186       89391 :     int         len = VARSIZE_ANY_EXHDR(tunpacked);
     187             :     char       *result;
     188             : 
     189       89391 :     result = (char *) palloc(len + 1);
     190       89391 :     memcpy(result, VARDATA_ANY(tunpacked), len);
     191       89391 :     result[len] = '\0';
     192             : 
     193       89391 :     if (tunpacked != t)
     194        1209 :         pfree(tunpacked);
     195             : 
     196       89391 :     return result;
     197             : }
     198             : 
     199             : /*
     200             :  * text_to_cstring_buffer
     201             :  *
     202             :  * Copy a text value into a caller-supplied buffer of size dst_len.
     203             :  *
     204             :  * The text string is truncated if necessary to fit.  The result is
     205             :  * guaranteed null-terminated (unless dst_len == 0).
     206             :  *
     207             :  * We support being passed a compressed or toasted text value.
     208             :  * This is a bit bogus since such values shouldn't really be referred to as
     209             :  * "text *", but it seems useful for robustness.  If we didn't handle that
     210             :  * case here, we'd need another routine that did, anyway.
     211             :  */
     212             : void
     213          92 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
     214             : {
     215             :     /* must cast away the const, unfortunately */
     216          92 :     text       *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
     217          92 :     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
     218             : 
     219          92 :     if (dst_len > 0)
     220             :     {
     221          92 :         dst_len--;
     222          92 :         if (dst_len >= src_len)
     223          92 :             dst_len = src_len;
     224             :         else                    /* ensure truncation is encoding-safe */
     225           0 :             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
     226          92 :         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
     227          92 :         dst[dst_len] = '\0';
     228             :     }
     229             : 
     230          92 :     if (srcunpacked != src)
     231           0 :         pfree(srcunpacked);
     232          92 : }
     233             : 
     234             : 
     235             : /*****************************************************************************
     236             :  *   USER I/O ROUTINES                                                       *
     237             :  *****************************************************************************/
     238             : 
     239             : 
     240             : #define VAL(CH)         ((CH) - '0')
     241             : #define DIG(VAL)        ((VAL) + '0')
     242             : 
     243             : /*
     244             :  *      byteain         - converts from printable representation of byte array
     245             :  *
     246             :  *      Non-printable characters must be passed as '\nnn' (octal) and are
     247             :  *      converted to internal form.  '\' must be passed as '\\'.
     248             :  *      ereport(ERROR, ...) if bad form.
     249             :  *
     250             :  *      BUGS:
     251             :  *              The input is scanned twice.
     252             :  *              The error checking of input is minimal.
     253             :  */
     254             : Datum
     255         746 : byteain(PG_FUNCTION_ARGS)
     256             : {
     257         746 :     char       *inputText = PG_GETARG_CSTRING(0);
     258             :     char       *tp;
     259             :     char       *rp;
     260             :     int         bc;
     261             :     bytea      *result;
     262             : 
     263             :     /* Recognize hex input */
     264         746 :     if (inputText[0] == '\\' && inputText[1] == 'x')
     265             :     {
     266           9 :         size_t      len = strlen(inputText);
     267             : 
     268           9 :         bc = (len - 2) / 2 + VARHDRSZ;  /* maximum possible length */
     269           9 :         result = palloc(bc);
     270           9 :         bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
     271           7 :         SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
     272             : 
     273           7 :         PG_RETURN_BYTEA_P(result);
     274             :     }
     275             : 
     276             :     /* Else, it's the traditional escaped style */
     277       10311 :     for (bc = 0, tp = inputText; *tp != '\0'; bc++)
     278             :     {
     279        9575 :         if (tp[0] != '\\')
     280        9471 :             tp++;
     281         208 :         else if ((tp[0] == '\\') &&
     282         311 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     283         309 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     284         206 :                  (tp[3] >= '0' && tp[3] <= '7'))
     285         103 :             tp += 4;
     286           2 :         else if ((tp[0] == '\\') &&
     287           1 :                  (tp[1] == '\\'))
     288           0 :             tp += 2;
     289             :         else
     290             :         {
     291             :             /*
     292             :              * one backslash, not followed by another or ### valid octal
     293             :              */
     294           1 :             ereport(ERROR,
     295             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     296             :                      errmsg("invalid input syntax for type %s", "bytea")));
     297             :         }
     298             :     }
     299             : 
     300         736 :     bc += VARHDRSZ;
     301             : 
     302         736 :     result = (bytea *) palloc(bc);
     303         736 :     SET_VARSIZE(result, bc);
     304             : 
     305         736 :     tp = inputText;
     306         736 :     rp = VARDATA(result);
     307       11044 :     while (*tp != '\0')
     308             :     {
     309        9572 :         if (tp[0] != '\\')
     310        9469 :             *rp++ = *tp++;
     311         206 :         else if ((tp[0] == '\\') &&
     312         309 :                  (tp[1] >= '0' && tp[1] <= '3') &&
     313         309 :                  (tp[2] >= '0' && tp[2] <= '7') &&
     314         206 :                  (tp[3] >= '0' && tp[3] <= '7'))
     315             :         {
     316         103 :             bc = VAL(tp[1]);
     317         103 :             bc <<= 3;
     318         103 :             bc += VAL(tp[2]);
     319         103 :             bc <<= 3;
     320         103 :             *rp++ = bc + VAL(tp[3]);
     321             : 
     322         103 :             tp += 4;
     323             :         }
     324           0 :         else if ((tp[0] == '\\') &&
     325           0 :                  (tp[1] == '\\'))
     326             :         {
     327           0 :             *rp++ = '\\';
     328           0 :             tp += 2;
     329             :         }
     330             :         else
     331             :         {
     332             :             /*
     333             :              * We should never get here. The first pass should not allow it.
     334             :              */
     335           0 :             ereport(ERROR,
     336             :                     (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
     337             :                      errmsg("invalid input syntax for type %s", "bytea")));
     338             :         }
     339             :     }
     340             : 
     341         736 :     PG_RETURN_BYTEA_P(result);
     342             : }
     343             : 
     344             : /*
     345             :  *      byteaout        - converts to printable representation of byte array
     346             :  *
     347             :  *      In the traditional escaped format, non-printable characters are
     348             :  *      printed as '\nnn' (octal) and '\' as '\\'.
     349             :  */
     350             : Datum
     351          67 : byteaout(PG_FUNCTION_ARGS)
     352             : {
     353          67 :     bytea      *vlena = PG_GETARG_BYTEA_PP(0);
     354             :     char       *result;
     355             :     char       *rp;
     356             : 
     357          67 :     if (bytea_output == BYTEA_OUTPUT_HEX)
     358             :     {
     359             :         /* Print hex format */
     360          34 :         rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
     361          34 :         *rp++ = '\\';
     362          34 :         *rp++ = 'x';
     363          34 :         rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
     364             :     }
     365          33 :     else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
     366             :     {
     367             :         /* Print traditional escaped format */
     368             :         char       *vp;
     369             :         int         len;
     370             :         int         i;
     371             : 
     372          33 :         len = 1;                /* empty string has 1 char */
     373          33 :         vp = VARDATA_ANY(vlena);
     374         380 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     375             :         {
     376         347 :             if (*vp == '\\')
     377           0 :                 len += 2;
     378         347 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     379          79 :                 len += 4;
     380             :             else
     381         268 :                 len++;
     382             :         }
     383          33 :         rp = result = (char *) palloc(len);
     384          33 :         vp = VARDATA_ANY(vlena);
     385         380 :         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
     386             :         {
     387         347 :             if (*vp == '\\')
     388             :             {
     389           0 :                 *rp++ = '\\';
     390           0 :                 *rp++ = '\\';
     391             :             }
     392         347 :             else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
     393          79 :             {
     394             :                 int         val;    /* holds unprintable chars */
     395             : 
     396          79 :                 val = *vp;
     397          79 :                 rp[0] = '\\';
     398          79 :                 rp[3] = DIG(val & 07);
     399          79 :                 val >>= 3;
     400          79 :                 rp[2] = DIG(val & 07);
     401          79 :                 val >>= 3;
     402          79 :                 rp[1] = DIG(val & 03);
     403          79 :                 rp += 4;
     404             :             }
     405             :             else
     406         268 :                 *rp++ = *vp;
     407             :         }
     408             :     }
     409             :     else
     410             :     {
     411           0 :         elog(ERROR, "unrecognized bytea_output setting: %d",
     412             :              bytea_output);
     413             :         rp = result = NULL;     /* keep compiler quiet */
     414             :     }
     415          67 :     *rp = '\0';
     416          67 :     PG_RETURN_CSTRING(result);
     417             : }
     418             : 
     419             : /*
     420             :  *      bytearecv           - converts external binary format to bytea
     421             :  */
     422             : Datum
     423         164 : bytearecv(PG_FUNCTION_ARGS)
     424             : {
     425         164 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     426             :     bytea      *result;
     427             :     int         nbytes;
     428             : 
     429         164 :     nbytes = buf->len - buf->cursor;
     430         164 :     result = (bytea *) palloc(nbytes + VARHDRSZ);
     431         164 :     SET_VARSIZE(result, nbytes + VARHDRSZ);
     432         164 :     pq_copymsgbytes(buf, VARDATA(result), nbytes);
     433         164 :     PG_RETURN_BYTEA_P(result);
     434             : }
     435             : 
     436             : /*
     437             :  *      byteasend           - converts bytea to binary format
     438             :  *
     439             :  * This is a special case: just copy the input...
     440             :  */
     441             : Datum
     442          83 : byteasend(PG_FUNCTION_ARGS)
     443             : {
     444          83 :     bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
     445             : 
     446          83 :     PG_RETURN_BYTEA_P(vlena);
     447             : }
     448             : 
     449             : Datum
     450           7 : bytea_string_agg_transfn(PG_FUNCTION_ARGS)
     451             : {
     452             :     StringInfo  state;
     453             : 
     454           7 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     455             : 
     456             :     /* Append the value unless null. */
     457           7 :     if (!PG_ARGISNULL(1))
     458             :     {
     459           7 :         bytea      *value = PG_GETARG_BYTEA_PP(1);
     460             : 
     461             :         /* On the first time through, we ignore the delimiter. */
     462           7 :         if (state == NULL)
     463           4 :             state = makeStringAggState(fcinfo);
     464           3 :         else if (!PG_ARGISNULL(2))
     465             :         {
     466           2 :             bytea      *delim = PG_GETARG_BYTEA_PP(2);
     467             : 
     468           2 :             appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
     469             :         }
     470             : 
     471           7 :         appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
     472             :     }
     473             : 
     474             :     /*
     475             :      * The transition type for string_agg() is declared to be "internal",
     476             :      * which is a pass-by-value type the same size as a pointer.
     477             :      */
     478           7 :     PG_RETURN_POINTER(state);
     479             : }
     480             : 
     481             : Datum
     482           5 : bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
     483             : {
     484             :     StringInfo  state;
     485             : 
     486             :     /* cannot be called directly because of internal-type argument */
     487           5 :     Assert(AggCheckCallContext(fcinfo, NULL));
     488             : 
     489           5 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
     490             : 
     491           5 :     if (state != NULL)
     492             :     {
     493             :         bytea      *result;
     494             : 
     495           4 :         result = (bytea *) palloc(state->len + VARHDRSZ);
     496           4 :         SET_VARSIZE(result, state->len + VARHDRSZ);
     497           4 :         memcpy(VARDATA(result), state->data, state->len);
     498           4 :         PG_RETURN_BYTEA_P(result);
     499             :     }
     500             :     else
     501           1 :         PG_RETURN_NULL();
     502             : }
     503             : 
     504             : /*
     505             :  *      textin          - converts "..." to internal representation
     506             :  */
     507             : Datum
     508      137657 : textin(PG_FUNCTION_ARGS)
     509             : {
     510      137657 :     char       *inputText = PG_GETARG_CSTRING(0);
     511             : 
     512      137657 :     PG_RETURN_TEXT_P(cstring_to_text(inputText));
     513             : }
     514             : 
     515             : /*
     516             :  *      textout         - converts internal representation to "..."
     517             :  */
     518             : Datum
     519       39899 : textout(PG_FUNCTION_ARGS)
     520             : {
     521       39899 :     Datum       txt = PG_GETARG_DATUM(0);
     522             : 
     523       39899 :     PG_RETURN_CSTRING(TextDatumGetCString(txt));
     524             : }
     525             : 
     526             : /*
     527             :  *      textrecv            - converts external binary format to text
     528             :  */
     529             : Datum
     530           3 : textrecv(PG_FUNCTION_ARGS)
     531             : {
     532           3 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     533             :     text       *result;
     534             :     char       *str;
     535             :     int         nbytes;
     536             : 
     537           3 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     538             : 
     539           3 :     result = cstring_to_text_with_len(str, nbytes);
     540           3 :     pfree(str);
     541           3 :     PG_RETURN_TEXT_P(result);
     542             : }
     543             : 
     544             : /*
     545             :  *      textsend            - converts text to binary format
     546             :  */
     547             : Datum
     548           3 : textsend(PG_FUNCTION_ARGS)
     549             : {
     550           3 :     text       *t = PG_GETARG_TEXT_PP(0);
     551             :     StringInfoData buf;
     552             : 
     553           3 :     pq_begintypsend(&buf);
     554           3 :     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
     555           3 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     556             : }
     557             : 
     558             : 
     559             : /*
     560             :  *      unknownin           - converts "..." to internal representation
     561             :  */
     562             : Datum
     563           0 : unknownin(PG_FUNCTION_ARGS)
     564             : {
     565           0 :     char       *str = PG_GETARG_CSTRING(0);
     566             : 
     567             :     /* representation is same as cstring */
     568           0 :     PG_RETURN_CSTRING(pstrdup(str));
     569             : }
     570             : 
     571             : /*
     572             :  *      unknownout          - converts internal representation to "..."
     573             :  */
     574             : Datum
     575          65 : unknownout(PG_FUNCTION_ARGS)
     576             : {
     577             :     /* representation is same as cstring */
     578          65 :     char       *str = PG_GETARG_CSTRING(0);
     579             : 
     580          65 :     PG_RETURN_CSTRING(pstrdup(str));
     581             : }
     582             : 
     583             : /*
     584             :  *      unknownrecv         - converts external binary format to unknown
     585             :  */
     586             : Datum
     587           0 : unknownrecv(PG_FUNCTION_ARGS)
     588             : {
     589           0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
     590             :     char       *str;
     591             :     int         nbytes;
     592             : 
     593           0 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
     594             :     /* representation is same as cstring */
     595           0 :     PG_RETURN_CSTRING(str);
     596             : }
     597             : 
     598             : /*
     599             :  *      unknownsend         - converts unknown to binary format
     600             :  */
     601             : Datum
     602           0 : unknownsend(PG_FUNCTION_ARGS)
     603             : {
     604             :     /* representation is same as cstring */
     605           0 :     char       *str = PG_GETARG_CSTRING(0);
     606             :     StringInfoData buf;
     607             : 
     608           0 :     pq_begintypsend(&buf);
     609           0 :     pq_sendtext(&buf, str, strlen(str));
     610           0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
     611             : }
     612             : 
     613             : 
     614             : /* ========== PUBLIC ROUTINES ========== */
     615             : 
     616             : /*
     617             :  * textlen -
     618             :  *    returns the logical length of a text*
     619             :  *     (which is less than the VARSIZE of the text*)
     620             :  */
     621             : Datum
     622       10247 : textlen(PG_FUNCTION_ARGS)
     623             : {
     624       10247 :     Datum       str = PG_GETARG_DATUM(0);
     625             : 
     626             :     /* try to avoid decompressing argument */
     627       10247 :     PG_RETURN_INT32(text_length(str));
     628             : }
     629             : 
     630             : /*
     631             :  * text_length -
     632             :  *  Does the real work for textlen()
     633             :  *
     634             :  *  This is broken out so it can be called directly by other string processing
     635             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     636             :  *  it may still be in compressed form.  We can avoid decompressing it at all
     637             :  *  in some cases.
     638             :  */
     639             : static int32
     640       10249 : text_length(Datum str)
     641             : {
     642             :     /* fastpath when max encoding length is one */
     643       10249 :     if (pg_database_encoding_max_length() == 1)
     644           0 :         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     645             :     else
     646             :     {
     647       10249 :         text       *t = DatumGetTextPP(str);
     648             : 
     649       10249 :         PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
     650             :                                              VARSIZE_ANY_EXHDR(t)));
     651             :     }
     652             : }
     653             : 
     654             : /*
     655             :  * textoctetlen -
     656             :  *    returns the physical length of a text*
     657             :  *     (which is less than the VARSIZE of the text*)
     658             :  */
     659             : Datum
     660           3 : textoctetlen(PG_FUNCTION_ARGS)
     661             : {
     662           3 :     Datum       str = PG_GETARG_DATUM(0);
     663             : 
     664             :     /* We need not detoast the input at all */
     665           3 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
     666             : }
     667             : 
     668             : /*
     669             :  * textcat -
     670             :  *    takes two text* and returns a text* that is the concatenation of
     671             :  *    the two.
     672             :  *
     673             :  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
     674             :  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
     675             :  * Allocate space for output in all cases.
     676             :  * XXX - thomas 1997-07-10
     677             :  */
     678             : Datum
     679       59669 : textcat(PG_FUNCTION_ARGS)
     680             : {
     681       59669 :     text       *t1 = PG_GETARG_TEXT_PP(0);
     682       59669 :     text       *t2 = PG_GETARG_TEXT_PP(1);
     683             : 
     684       59669 :     PG_RETURN_TEXT_P(text_catenate(t1, t2));
     685             : }
     686             : 
     687             : /*
     688             :  * text_catenate
     689             :  *  Guts of textcat(), broken out so it can be used by other functions
     690             :  *
     691             :  * Arguments can be in short-header form, but not compressed or out-of-line
     692             :  */
     693             : static text *
     694       59677 : text_catenate(text *t1, text *t2)
     695             : {
     696             :     text       *result;
     697             :     int         len1,
     698             :                 len2,
     699             :                 len;
     700             :     char       *ptr;
     701             : 
     702       59677 :     len1 = VARSIZE_ANY_EXHDR(t1);
     703       59677 :     len2 = VARSIZE_ANY_EXHDR(t2);
     704             : 
     705             :     /* paranoia ... probably should throw error instead? */
     706       59677 :     if (len1 < 0)
     707           0 :         len1 = 0;
     708       59677 :     if (len2 < 0)
     709           0 :         len2 = 0;
     710             : 
     711       59677 :     len = len1 + len2 + VARHDRSZ;
     712       59677 :     result = (text *) palloc(len);
     713             : 
     714             :     /* Set size of result string... */
     715       59677 :     SET_VARSIZE(result, len);
     716             : 
     717             :     /* Fill data field of result string... */
     718       59677 :     ptr = VARDATA(result);
     719       59677 :     if (len1 > 0)
     720       59637 :         memcpy(ptr, VARDATA_ANY(t1), len1);
     721       59677 :     if (len2 > 0)
     722       59660 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
     723             : 
     724       59677 :     return result;
     725             : }
     726             : 
     727             : /*
     728             :  * charlen_to_bytelen()
     729             :  *  Compute the number of bytes occupied by n characters starting at *p
     730             :  *
     731             :  * It is caller's responsibility that there actually are n characters;
     732             :  * the string need not be null-terminated.
     733             :  */
     734             : static int
     735         152 : charlen_to_bytelen(const char *p, int n)
     736             : {
     737         152 :     if (pg_database_encoding_max_length() == 1)
     738             :     {
     739             :         /* Optimization for single-byte encodings */
     740           0 :         return n;
     741             :     }
     742             :     else
     743             :     {
     744             :         const char *s;
     745             : 
     746        1324 :         for (s = p; n > 0; n--)
     747        1172 :             s += pg_mblen(s);
     748             : 
     749         152 :         return s - p;
     750             :     }
     751             : }
     752             : 
     753             : /*
     754             :  * text_substr()
     755             :  * Return a substring starting at the specified position.
     756             :  * - thomas 1997-12-31
     757             :  *
     758             :  * Input:
     759             :  *  - string
     760             :  *  - starting position (is one-based)
     761             :  *  - string length
     762             :  *
     763             :  * If the starting position is zero or less, then return from the start of the string
     764             :  *  adjusting the length to be consistent with the "negative start" per SQL.
     765             :  * If the length is less than zero, return the remaining string.
     766             :  *
     767             :  * Added multibyte support.
     768             :  * - Tatsuo Ishii 1998-4-21
     769             :  * Changed behavior if starting position is less than one to conform to SQL behavior.
     770             :  * Formerly returned the entire string; now returns a portion.
     771             :  * - Thomas Lockhart 1998-12-10
     772             :  * Now uses faster TOAST-slicing interface
     773             :  * - John Gray 2002-02-22
     774             :  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
     775             :  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
     776             :  * error; if E < 1, return '', not entire string). Fixed MB related bug when
     777             :  * S > LC and < LC + 4 sometimes garbage characters are returned.
     778             :  * - Joe Conway 2002-08-10
     779             :  */
     780             : Datum
     781        1094 : text_substr(PG_FUNCTION_ARGS)
     782             : {
     783        1094 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     784             :                                     PG_GETARG_INT32(1),
     785             :                                     PG_GETARG_INT32(2),
     786             :                                     false));
     787             : }
     788             : 
     789             : /*
     790             :  * text_substr_no_len -
     791             :  *    Wrapper to avoid opr_sanity failure due to
     792             :  *    one function accepting a different number of args.
     793             :  */
     794             : Datum
     795          18 : text_substr_no_len(PG_FUNCTION_ARGS)
     796             : {
     797          18 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
     798             :                                     PG_GETARG_INT32(1),
     799             :                                     -1, true));
     800             : }
     801             : 
     802             : /*
     803             :  * text_substring -
     804             :  *  Does the real work for text_substr() and text_substr_no_len()
     805             :  *
     806             :  *  This is broken out so it can be called directly by other string processing
     807             :  *  functions.  Note that the argument is passed as a Datum, to indicate that
     808             :  *  it may still be in compressed/toasted form.  We can avoid detoasting all
     809             :  *  of it in some cases.
     810             :  *
     811             :  *  The result is always a freshly palloc'd datum.
     812             :  */
     813             : static text *
     814        1123 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
     815             : {
     816        1123 :     int32       eml = pg_database_encoding_max_length();
     817        1123 :     int32       S = start;      /* start position */
     818             :     int32       S1;             /* adjusted start position */
     819             :     int32       L1;             /* adjusted substring length */
     820             : 
     821             :     /* life is easy if the encoding max length is 1 */
     822        1123 :     if (eml == 1)
     823             :     {
     824           0 :         S1 = Max(S, 1);
     825             : 
     826           0 :         if (length_not_specified)   /* special case - get length to end of
     827             :                                      * string */
     828           0 :             L1 = -1;
     829             :         else
     830             :         {
     831             :             /* end position */
     832           0 :             int         E = S + length;
     833             : 
     834             :             /*
     835             :              * A negative value for L is the only way for the end position to
     836             :              * be before the start. SQL99 says to throw an error.
     837             :              */
     838           0 :             if (E < S)
     839           0 :                 ereport(ERROR,
     840             :                         (errcode(ERRCODE_SUBSTRING_ERROR),
     841             :                          errmsg("negative substring length not allowed")));
     842             : 
     843             :             /*
     844             :              * A zero or negative value for the end position can happen if the
     845             :              * start was negative or one. SQL99 says to return a zero-length
     846             :              * string.
     847             :              */
     848           0 :             if (E < 1)
     849           0 :                 return cstring_to_text("");
     850             : 
     851           0 :             L1 = E - S1;
     852             :         }
     853             : 
     854             :         /*
     855             :          * If the start position is past the end of the string, SQL99 says to
     856             :          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
     857             :          * that for us. Convert to zero-based starting position
     858             :          */
     859           0 :         return DatumGetTextPSlice(str, S1 - 1, L1);
     860             :     }
     861        1123 :     else if (eml > 1)
     862             :     {
     863             :         /*
     864             :          * When encoding max length is > 1, we can't get LC without
     865             :          * detoasting, so we'll grab a conservatively large slice now and go
     866             :          * back later to do the right thing
     867             :          */
     868             :         int32       slice_start;
     869             :         int32       slice_size;
     870             :         int32       slice_strlen;
     871             :         text       *slice;
     872             :         int32       E1;
     873             :         int32       i;
     874             :         char       *p;
     875             :         char       *s;
     876             :         text       *ret;
     877             : 
     878             :         /*
     879             :          * if S is past the end of the string, the tuple toaster will return a
     880             :          * zero-length string to us
     881             :          */
     882        1123 :         S1 = Max(S, 1);
     883             : 
     884             :         /*
     885             :          * We need to start at position zero because there is no way to know
     886             :          * in advance which byte offset corresponds to the supplied start
     887             :          * position.
     888             :          */
     889        1123 :         slice_start = 0;
     890             : 
     891        1123 :         if (length_not_specified)   /* special case - get length to end of
     892             :                                      * string */
     893          23 :             slice_size = L1 = -1;
     894             :         else
     895             :         {
     896        1100 :             int         E = S + length;
     897             : 
     898             :             /*
     899             :              * A negative value for L is the only way for the end position to
     900             :              * be before the start. SQL99 says to throw an error.
     901             :              */
     902        1100 :             if (E < S)
     903           1 :                 ereport(ERROR,
     904             :                         (errcode(ERRCODE_SUBSTRING_ERROR),
     905             :                          errmsg("negative substring length not allowed")));
     906             : 
     907             :             /*
     908             :              * A zero or negative value for the end position can happen if the
     909             :              * start was negative or one. SQL99 says to return a zero-length
     910             :              * string.
     911             :              */
     912        1099 :             if (E < 1)
     913           0 :                 return cstring_to_text("");
     914             : 
     915             :             /*
     916             :              * if E is past the end of the string, the tuple toaster will
     917             :              * truncate the length for us
     918             :              */
     919        1099 :             L1 = E - S1;
     920             : 
     921             :             /*
     922             :              * Total slice size in bytes can't be any longer than the start
     923             :              * position plus substring length times the encoding max length.
     924             :              */
     925        1099 :             slice_size = (S1 + L1) * eml;
     926             :         }
     927             : 
     928             :         /*
     929             :          * If we're working with an untoasted source, no need to do an extra
     930             :          * copying step.
     931             :          */
     932        2238 :         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
     933        1116 :             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
     934          17 :             slice = DatumGetTextPSlice(str, slice_start, slice_size);
     935             :         else
     936        1105 :             slice = (text *) DatumGetPointer(str);
     937             : 
     938             :         /* see if we got back an empty string */
     939        1122 :         if (VARSIZE_ANY_EXHDR(slice) == 0)
     940             :         {
     941           0 :             if (slice != (text *) DatumGetPointer(str))
     942           0 :                 pfree(slice);
     943           0 :             return cstring_to_text("");
     944             :         }
     945             : 
     946             :         /* Now we can get the actual length of the slice in MB characters */
     947        3366 :         slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
     948        3366 :                                             VARSIZE_ANY_EXHDR(slice));
     949             : 
     950             :         /*
     951             :          * Check that the start position wasn't > slice_strlen. If so, SQL99
     952             :          * says to return a zero-length string.
     953             :          */
     954        1122 :         if (S1 > slice_strlen)
     955             :         {
     956           7 :             if (slice != (text *) DatumGetPointer(str))
     957           0 :                 pfree(slice);
     958           7 :             return cstring_to_text("");
     959             :         }
     960             : 
     961             :         /*
     962             :          * Adjust L1 and E1 now that we know the slice string length. Again
     963             :          * remember that S1 is one based, and slice_start is zero based.
     964             :          */
     965        1115 :         if (L1 > -1)
     966        1095 :             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
     967             :         else
     968          20 :             E1 = slice_start + 1 + slice_strlen;
     969             : 
     970             :         /*
     971             :          * Find the start position in the slice; remember S1 is not zero based
     972             :          */
     973        1115 :         p = VARDATA_ANY(slice);
     974      805133 :         for (i = 0; i < S1 - 1; i++)
     975      804018 :             p += pg_mblen(p);
     976             : 
     977             :         /* hang onto a pointer to our start position */
     978        1115 :         s = p;
     979             : 
     980             :         /*
     981             :          * Count the actual bytes used by the substring of the requested
     982             :          * length.
     983             :          */
     984       14283 :         for (i = S1; i < E1; i++)
     985       13168 :             p += pg_mblen(p);
     986             : 
     987        1115 :         ret = (text *) palloc(VARHDRSZ + (p - s));
     988        1115 :         SET_VARSIZE(ret, VARHDRSZ + (p - s));
     989        1115 :         memcpy(VARDATA(ret), s, (p - s));
     990             : 
     991        1115 :         if (slice != (text *) DatumGetPointer(str))
     992          17 :             pfree(slice);
     993             : 
     994        1115 :         return ret;
     995             :     }
     996             :     else
     997           0 :         elog(ERROR, "invalid backend encoding: encoding max length < 1");
     998             : 
     999             :     /* not reached: suppress compiler warning */
    1000             :     return NULL;
    1001             : }
    1002             : 
    1003             : /*
    1004             :  * textoverlay
    1005             :  *  Replace specified substring of first string with second
    1006             :  *
    1007             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    1008             :  * This code is a direct implementation of what the standard says.
    1009             :  */
    1010             : Datum
    1011           2 : textoverlay(PG_FUNCTION_ARGS)
    1012             : {
    1013           2 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1014           2 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1015           2 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1016           2 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    1017             : 
    1018           2 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1019             : }
    1020             : 
    1021             : Datum
    1022           2 : textoverlay_no_len(PG_FUNCTION_ARGS)
    1023             : {
    1024           2 :     text       *t1 = PG_GETARG_TEXT_PP(0);
    1025           2 :     text       *t2 = PG_GETARG_TEXT_PP(1);
    1026           2 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    1027             :     int         sl;
    1028             : 
    1029           2 :     sl = text_length(PointerGetDatum(t2));  /* defaults to length(t2) */
    1030           2 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
    1031             : }
    1032             : 
    1033             : static text *
    1034           4 : text_overlay(text *t1, text *t2, int sp, int sl)
    1035             : {
    1036             :     text       *result;
    1037             :     text       *s1;
    1038             :     text       *s2;
    1039             :     int         sp_pl_sl;
    1040             : 
    1041             :     /*
    1042             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    1043             :      * "substring length" error because that's what should be expected
    1044             :      * according to the spec's definition of OVERLAY().
    1045             :      */
    1046           4 :     if (sp <= 0)
    1047           0 :         ereport(ERROR,
    1048             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    1049             :                  errmsg("negative substring length not allowed")));
    1050           4 :     sp_pl_sl = sp + sl;
    1051           4 :     if (sp_pl_sl <= sl)
    1052           0 :         ereport(ERROR,
    1053             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    1054             :                  errmsg("integer out of range")));
    1055             : 
    1056           4 :     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
    1057           4 :     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    1058           4 :     result = text_catenate(s1, t2);
    1059           4 :     result = text_catenate(result, s2);
    1060             : 
    1061           4 :     return result;
    1062             : }
    1063             : 
    1064             : /*
    1065             :  * textpos -
    1066             :  *    Return the position of the specified substring.
    1067             :  *    Implements the SQL POSITION() function.
    1068             :  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
    1069             :  * - thomas 1997-07-27
    1070             :  */
    1071             : Datum
    1072           4 : textpos(PG_FUNCTION_ARGS)
    1073             : {
    1074           4 :     text       *str = PG_GETARG_TEXT_PP(0);
    1075           4 :     text       *search_str = PG_GETARG_TEXT_PP(1);
    1076             : 
    1077           4 :     PG_RETURN_INT32((int32) text_position(str, search_str));
    1078             : }
    1079             : 
    1080             : /*
    1081             :  * text_position -
    1082             :  *  Does the real work for textpos()
    1083             :  *
    1084             :  * Inputs:
    1085             :  *      t1 - string to be searched
    1086             :  *      t2 - pattern to match within t1
    1087             :  * Result:
    1088             :  *      Character index of the first matched char, starting from 1,
    1089             :  *      or 0 if no match.
    1090             :  *
    1091             :  *  This is broken out so it can be called directly by other string processing
    1092             :  *  functions.
    1093             :  */
    1094             : static int
    1095           4 : text_position(text *t1, text *t2)
    1096             : {
    1097             :     TextPositionState state;
    1098             :     int         result;
    1099             : 
    1100           4 :     text_position_setup(t1, t2, &state);
    1101           4 :     result = text_position_next(1, &state);
    1102           4 :     text_position_cleanup(&state);
    1103           4 :     return result;
    1104             : }
    1105             : 
    1106             : 
    1107             : /*
    1108             :  * text_position_setup, text_position_next, text_position_cleanup -
    1109             :  *  Component steps of text_position()
    1110             :  *
    1111             :  * These are broken out so that a string can be efficiently searched for
    1112             :  * multiple occurrences of the same pattern.  text_position_next may be
    1113             :  * called multiple times with increasing values of start_pos, which is
    1114             :  * the 1-based character position to start the search from.  The "state"
    1115             :  * variable is normally just a local variable in the caller.
    1116             :  */
    1117             : 
    1118             : static void
    1119          30 : text_position_setup(text *t1, text *t2, TextPositionState *state)
    1120             : {
    1121          30 :     int         len1 = VARSIZE_ANY_EXHDR(t1);
    1122          30 :     int         len2 = VARSIZE_ANY_EXHDR(t2);
    1123             : 
    1124          30 :     if (pg_database_encoding_max_length() == 1)
    1125             :     {
    1126             :         /* simple case - single byte encoding */
    1127           0 :         state->use_wchar = false;
    1128           0 :         state->str1 = VARDATA_ANY(t1);
    1129           0 :         state->str2 = VARDATA_ANY(t2);
    1130           0 :         state->len1 = len1;
    1131           0 :         state->len2 = len2;
    1132             :     }
    1133             :     else
    1134             :     {
    1135             :         /* not as simple - multibyte encoding */
    1136             :         pg_wchar   *p1,
    1137             :                    *p2;
    1138             : 
    1139          30 :         p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
    1140          30 :         len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
    1141          30 :         p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
    1142          30 :         len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
    1143             : 
    1144          30 :         state->use_wchar = true;
    1145          30 :         state->wstr1 = p1;
    1146          30 :         state->wstr2 = p2;
    1147          30 :         state->len1 = len1;
    1148          30 :         state->len2 = len2;
    1149             :     }
    1150             : 
    1151             :     /*
    1152             :      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
    1153             :      * notes we use the terminology that the "haystack" is the string to be
    1154             :      * searched (t1) and the "needle" is the pattern being sought (t2).
    1155             :      *
    1156             :      * If the needle is empty or bigger than the haystack then there is no
    1157             :      * point in wasting cycles initializing the table.  We also choose not to
    1158             :      * use B-M-H for needles of length 1, since the skip table can't possibly
    1159             :      * save anything in that case.
    1160             :      */
    1161          30 :     if (len1 >= len2 && len2 > 1)
    1162             :     {
    1163           8 :         int         searchlength = len1 - len2;
    1164             :         int         skiptablemask;
    1165             :         int         last;
    1166             :         int         i;
    1167             : 
    1168             :         /*
    1169             :          * First we must determine how much of the skip table to use.  The
    1170             :          * declaration of TextPositionState allows up to 256 elements, but for
    1171             :          * short search problems we don't really want to have to initialize so
    1172             :          * many elements --- it would take too long in comparison to the
    1173             :          * actual search time.  So we choose a useful skip table size based on
    1174             :          * the haystack length minus the needle length.  The closer the needle
    1175             :          * length is to the haystack length the less useful skipping becomes.
    1176             :          *
    1177             :          * Note: since we use bit-masking to select table elements, the skip
    1178             :          * table size MUST be a power of 2, and so the mask must be 2^N-1.
    1179             :          */
    1180           8 :         if (searchlength < 16)
    1181           6 :             skiptablemask = 3;
    1182           2 :         else if (searchlength < 64)
    1183           0 :             skiptablemask = 7;
    1184           2 :         else if (searchlength < 128)
    1185           0 :             skiptablemask = 15;
    1186           2 :         else if (searchlength < 512)
    1187           2 :             skiptablemask = 31;
    1188           0 :         else if (searchlength < 2048)
    1189           0 :             skiptablemask = 63;
    1190           0 :         else if (searchlength < 4096)
    1191           0 :             skiptablemask = 127;
    1192             :         else
    1193           0 :             skiptablemask = 255;
    1194           8 :         state->skiptablemask = skiptablemask;
    1195             : 
    1196             :         /*
    1197             :          * Initialize the skip table.  We set all elements to the needle
    1198             :          * length, since this is the correct skip distance for any character
    1199             :          * not found in the needle.
    1200             :          */
    1201          96 :         for (i = 0; i <= skiptablemask; i++)
    1202          88 :             state->skiptable[i] = len2;
    1203             : 
    1204             :         /*
    1205             :          * Now examine the needle.  For each character except the last one,
    1206             :          * set the corresponding table element to the appropriate skip
    1207             :          * distance.  Note that when two characters share the same skip table
    1208             :          * entry, the one later in the needle must determine the skip
    1209             :          * distance.
    1210             :          */
    1211           8 :         last = len2 - 1;
    1212             : 
    1213           8 :         if (!state->use_wchar)
    1214             :         {
    1215           0 :             const char *str2 = state->str2;
    1216             : 
    1217           0 :             for (i = 0; i < last; i++)
    1218           0 :                 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
    1219             :         }
    1220             :         else
    1221             :         {
    1222           8 :             const pg_wchar *wstr2 = state->wstr2;
    1223             : 
    1224          39 :             for (i = 0; i < last; i++)
    1225          31 :                 state->skiptable[wstr2[i] & skiptablemask] = last - i;
    1226             :         }
    1227             :     }
    1228          30 : }
    1229             : 
    1230             : static int
    1231          85 : text_position_next(int start_pos, TextPositionState *state)
    1232             : {
    1233          85 :     int         haystack_len = state->len1;
    1234          85 :     int         needle_len = state->len2;
    1235          85 :     int         skiptablemask = state->skiptablemask;
    1236             : 
    1237          85 :     Assert(start_pos > 0);       /* else caller error */
    1238             : 
    1239          85 :     if (needle_len <= 0)
    1240           0 :         return start_pos;       /* result for empty pattern */
    1241             : 
    1242          85 :     start_pos--;                /* adjust for zero based arrays */
    1243             : 
    1244             :     /* Done if the needle can't possibly fit */
    1245          85 :     if (haystack_len < start_pos + needle_len)
    1246           4 :         return 0;
    1247             : 
    1248          81 :     if (!state->use_wchar)
    1249             :     {
    1250             :         /* simple case - single byte encoding */
    1251           0 :         const char *haystack = state->str1;
    1252           0 :         const char *needle = state->str2;
    1253           0 :         const char *haystack_end = &haystack[haystack_len];
    1254             :         const char *hptr;
    1255             : 
    1256           0 :         if (needle_len == 1)
    1257             :         {
    1258             :             /* No point in using B-M-H for a one-character needle */
    1259           0 :             char        nchar = *needle;
    1260             : 
    1261           0 :             hptr = &haystack[start_pos];
    1262           0 :             while (hptr < haystack_end)
    1263             :             {
    1264           0 :                 if (*hptr == nchar)
    1265           0 :                     return hptr - haystack + 1;
    1266           0 :                 hptr++;
    1267             :             }
    1268             :         }
    1269             :         else
    1270             :         {
    1271           0 :             const char *needle_last = &needle[needle_len - 1];
    1272             : 
    1273             :             /* Start at startpos plus the length of the needle */
    1274           0 :             hptr = &haystack[start_pos + needle_len - 1];
    1275           0 :             while (hptr < haystack_end)
    1276             :             {
    1277             :                 /* Match the needle scanning *backward* */
    1278             :                 const char *nptr;
    1279             :                 const char *p;
    1280             : 
    1281           0 :                 nptr = needle_last;
    1282           0 :                 p = hptr;
    1283           0 :                 while (*nptr == *p)
    1284             :                 {
    1285             :                     /* Matched it all?  If so, return 1-based position */
    1286           0 :                     if (nptr == needle)
    1287           0 :                         return p - haystack + 1;
    1288           0 :                     nptr--, p--;
    1289             :                 }
    1290             : 
    1291             :                 /*
    1292             :                  * No match, so use the haystack char at hptr to decide how
    1293             :                  * far to advance.  If the needle had any occurrence of that
    1294             :                  * character (or more precisely, one sharing the same
    1295             :                  * skiptable entry) before its last character, then we advance
    1296             :                  * far enough to align the last such needle character with
    1297             :                  * that haystack position.  Otherwise we can advance by the
    1298             :                  * whole needle length.
    1299             :                  */
    1300           0 :                 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
    1301             :             }
    1302             :         }
    1303             :     }
    1304             :     else
    1305             :     {
    1306             :         /* The multibyte char version. This works exactly the same way. */
    1307          81 :         const pg_wchar *haystack = state->wstr1;
    1308          81 :         const pg_wchar *needle = state->wstr2;
    1309          81 :         const pg_wchar *haystack_end = &haystack[haystack_len];
    1310             :         const pg_wchar *hptr;
    1311             : 
    1312          81 :         if (needle_len == 1)
    1313             :         {
    1314             :             /* No point in using B-M-H for a one-character needle */
    1315          70 :             pg_wchar    nchar = *needle;
    1316             : 
    1317          70 :             hptr = &haystack[start_pos];
    1318         778 :             while (hptr < haystack_end)
    1319             :             {
    1320         697 :                 if (*hptr == nchar)
    1321          59 :                     return hptr - haystack + 1;
    1322         638 :                 hptr++;
    1323             :             }
    1324             :         }
    1325             :         else
    1326             :         {
    1327          11 :             const pg_wchar *needle_last = &needle[needle_len - 1];
    1328             : 
    1329             :             /* Start at startpos plus the length of the needle */
    1330          11 :             hptr = &haystack[start_pos + needle_len - 1];
    1331         105 :             while (hptr < haystack_end)
    1332             :             {
    1333             :                 /* Match the needle scanning *backward* */
    1334             :                 const pg_wchar *nptr;
    1335             :                 const pg_wchar *p;
    1336             : 
    1337          90 :                 nptr = needle_last;
    1338          90 :                 p = hptr;
    1339         196 :                 while (*nptr == *p)
    1340             :                 {
    1341             :                     /* Matched it all?  If so, return 1-based position */
    1342          23 :                     if (nptr == needle)
    1343           7 :                         return p - haystack + 1;
    1344          16 :                     nptr--, p--;
    1345             :                 }
    1346             : 
    1347             :                 /*
    1348             :                  * No match, so use the haystack char at hptr to decide how
    1349             :                  * far to advance.  If the needle had any occurrence of that
    1350             :                  * character (or more precisely, one sharing the same
    1351             :                  * skiptable entry) before its last character, then we advance
    1352             :                  * far enough to align the last such needle character with
    1353             :                  * that haystack position.  Otherwise we can advance by the
    1354             :                  * whole needle length.
    1355             :                  */
    1356          83 :                 hptr += state->skiptable[*hptr & skiptablemask];
    1357             :             }
    1358             :         }
    1359             :     }
    1360             : 
    1361          15 :     return 0;                   /* not found */
    1362             : }
    1363             : 
    1364             : static void
    1365          30 : text_position_cleanup(TextPositionState *state)
    1366             : {
    1367          30 :     if (state->use_wchar)
    1368             :     {
    1369          30 :         pfree(state->wstr1);
    1370          30 :         pfree(state->wstr2);
    1371             :     }
    1372          30 : }
    1373             : 
    1374             : /* varstr_cmp()
    1375             :  * Comparison function for text strings with given lengths.
    1376             :  * Includes locale support, but must copy strings to temporary memory
    1377             :  *  to allow null-termination for inputs to strcoll().
    1378             :  * Returns an integer less than, equal to, or greater than zero, indicating
    1379             :  * whether arg1 is less than, equal to, or greater than arg2.
    1380             :  */
    1381             : int
    1382      408050 : varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
    1383             : {
    1384             :     int         result;
    1385             : 
    1386             :     /*
    1387             :      * Unfortunately, there is no strncoll(), so in the non-C locale case we
    1388             :      * have to do some memory copying.  This turns out to be significantly
    1389             :      * slower, so we optimize the case where LC_COLLATE is C.  We also try to
    1390             :      * optimize relatively-short strings by avoiding palloc/pfree overhead.
    1391             :      */
    1392      408050 :     if (lc_collate_is_c(collid))
    1393             :     {
    1394      121865 :         result = memcmp(arg1, arg2, Min(len1, len2));
    1395      121865 :         if ((result == 0) && (len1 != len2))
    1396        3321 :             result = (len1 < len2) ? -1 : 1;
    1397             :     }
    1398             :     else
    1399             :     {
    1400             :         char        a1buf[TEXTBUFLEN];
    1401             :         char        a2buf[TEXTBUFLEN];
    1402             :         char       *a1p,
    1403             :                    *a2p;
    1404      286185 :         pg_locale_t mylocale = 0;
    1405             : 
    1406      286185 :         if (collid != DEFAULT_COLLATION_OID)
    1407             :         {
    1408           1 :             if (!OidIsValid(collid))
    1409             :             {
    1410             :                 /*
    1411             :                  * This typically means that the parser could not resolve a
    1412             :                  * conflict of implicit collations, so report it that way.
    1413             :                  */
    1414           1 :                 ereport(ERROR,
    1415             :                         (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1416             :                          errmsg("could not determine which collation to use for string comparison"),
    1417             :                          errhint("Use the COLLATE clause to set the collation explicitly.")));
    1418             :             }
    1419           0 :             mylocale = pg_newlocale_from_collation(collid);
    1420             :         }
    1421             : 
    1422             :         /*
    1423             :          * memcmp() can't tell us which of two unequal strings sorts first,
    1424             :          * but it's a cheap way to tell if they're equal.  Testing shows that
    1425             :          * memcmp() followed by strcoll() is only trivially slower than
    1426             :          * strcoll() by itself, so we don't lose much if this doesn't work out
    1427             :          * very often, and if it does - for example, because there are many
    1428             :          * equal strings in the input - then we win big by avoiding expensive
    1429             :          * collation-aware comparisons.
    1430             :          */
    1431      286184 :         if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
    1432      102932 :             return 0;
    1433             : 
    1434             : #ifdef WIN32
    1435             :         /* Win32 does not have UTF-8, so we need to map to UTF-16 */
    1436             :         if (GetDatabaseEncoding() == PG_UTF8
    1437             :             && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
    1438             :         {
    1439             :             int         a1len;
    1440             :             int         a2len;
    1441             :             int         r;
    1442             : 
    1443             :             if (len1 >= TEXTBUFLEN / 2)
    1444             :             {
    1445             :                 a1len = len1 * 2 + 2;
    1446             :                 a1p = palloc(a1len);
    1447             :             }
    1448             :             else
    1449             :             {
    1450             :                 a1len = TEXTBUFLEN;
    1451             :                 a1p = a1buf;
    1452             :             }
    1453             :             if (len2 >= TEXTBUFLEN / 2)
    1454             :             {
    1455             :                 a2len = len2 * 2 + 2;
    1456             :                 a2p = palloc(a2len);
    1457             :             }
    1458             :             else
    1459             :             {
    1460             :                 a2len = TEXTBUFLEN;
    1461             :                 a2p = a2buf;
    1462             :             }
    1463             : 
    1464             :             /* stupid Microsloth API does not work for zero-length input */
    1465             :             if (len1 == 0)
    1466             :                 r = 0;
    1467             :             else
    1468             :             {
    1469             :                 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1470             :                                         (LPWSTR) a1p, a1len / 2);
    1471             :                 if (!r)
    1472             :                     ereport(ERROR,
    1473             :                             (errmsg("could not convert string to UTF-16: error code %lu",
    1474             :                                     GetLastError())));
    1475             :             }
    1476             :             ((LPWSTR) a1p)[r] = 0;
    1477             : 
    1478             :             if (len2 == 0)
    1479             :                 r = 0;
    1480             :             else
    1481             :             {
    1482             :                 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1483             :                                         (LPWSTR) a2p, a2len / 2);
    1484             :                 if (!r)
    1485             :                     ereport(ERROR,
    1486             :                             (errmsg("could not convert string to UTF-16: error code %lu",
    1487             :                                     GetLastError())));
    1488             :             }
    1489             :             ((LPWSTR) a2p)[r] = 0;
    1490             : 
    1491             :             errno = 0;
    1492             : #ifdef HAVE_LOCALE_T
    1493             :             if (mylocale)
    1494             :                 result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
    1495             :             else
    1496             : #endif
    1497             :                 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
    1498             :             if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw
    1499             :                                          * headers */
    1500             :                 ereport(ERROR,
    1501             :                         (errmsg("could not compare Unicode strings: %m")));
    1502             : 
    1503             :             /*
    1504             :              * In some locales wcscoll() can claim that nonidentical strings
    1505             :              * are equal.  Believing that would be bad news for a number of
    1506             :              * reasons, so we follow Perl's lead and sort "equal" strings
    1507             :              * according to strcmp (on the UTF-8 representation).
    1508             :              */
    1509             :             if (result == 0)
    1510             :             {
    1511             :                 result = memcmp(arg1, arg2, Min(len1, len2));
    1512             :                 if ((result == 0) && (len1 != len2))
    1513             :                     result = (len1 < len2) ? -1 : 1;
    1514             :             }
    1515             : 
    1516             :             if (a1p != a1buf)
    1517             :                 pfree(a1p);
    1518             :             if (a2p != a2buf)
    1519             :                 pfree(a2p);
    1520             : 
    1521             :             return result;
    1522             :         }
    1523             : #endif                          /* WIN32 */
    1524             : 
    1525      183252 :         if (len1 >= TEXTBUFLEN)
    1526         200 :             a1p = (char *) palloc(len1 + 1);
    1527             :         else
    1528      183052 :             a1p = a1buf;
    1529      183252 :         if (len2 >= TEXTBUFLEN)
    1530           0 :             a2p = (char *) palloc(len2 + 1);
    1531             :         else
    1532      183252 :             a2p = a2buf;
    1533             : 
    1534      183252 :         memcpy(a1p, arg1, len1);
    1535      183252 :         a1p[len1] = '\0';
    1536      183252 :         memcpy(a2p, arg2, len2);
    1537      183252 :         a2p[len2] = '\0';
    1538             : 
    1539      183252 :         if (mylocale)
    1540             :         {
    1541           0 :             if (mylocale->provider == COLLPROVIDER_ICU)
    1542             :             {
    1543             : #ifdef USE_ICU
    1544             : #ifdef HAVE_UCOL_STRCOLLUTF8
    1545             :                 if (GetDatabaseEncoding() == PG_UTF8)
    1546             :                 {
    1547             :                     UErrorCode  status;
    1548             : 
    1549             :                     status = U_ZERO_ERROR;
    1550             :                     result = ucol_strcollUTF8(mylocale->info.icu.ucol,
    1551             :                                               arg1, len1,
    1552             :                                               arg2, len2,
    1553             :                                               &status);
    1554             :                     if (U_FAILURE(status))
    1555             :                         ereport(ERROR,
    1556             :                                 (errmsg("collation failed: %s", u_errorName(status))));
    1557             :                 }
    1558             :                 else
    1559             : #endif
    1560             :                 {
    1561             :                     int32_t     ulen1,
    1562             :                                 ulen2;
    1563             :                     UChar      *uchar1,
    1564             :                                *uchar2;
    1565             : 
    1566             :                     ulen1 = icu_to_uchar(&uchar1, arg1, len1);
    1567             :                     ulen2 = icu_to_uchar(&uchar2, arg2, len2);
    1568             : 
    1569             :                     result = ucol_strcoll(mylocale->info.icu.ucol,
    1570             :                                           uchar1, ulen1,
    1571             :                                           uchar2, ulen2);
    1572             : 
    1573             :                     pfree(uchar1);
    1574             :                     pfree(uchar2);
    1575             :                 }
    1576             : #else                           /* not USE_ICU */
    1577             :                 /* shouldn't happen */
    1578           0 :                 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
    1579             : #endif                          /* not USE_ICU */
    1580             :             }
    1581             :             else
    1582             :             {
    1583             : #ifdef HAVE_LOCALE_T
    1584           0 :                 result = strcoll_l(a1p, a2p, mylocale->info.lt);
    1585             : #else
    1586             :                 /* shouldn't happen */
    1587             :                 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
    1588             : #endif
    1589             :             }
    1590             :         }
    1591             :         else
    1592      183252 :             result = strcoll(a1p, a2p);
    1593             : 
    1594             :         /*
    1595             :          * In some locales strcoll() can claim that nonidentical strings are
    1596             :          * equal.  Believing that would be bad news for a number of reasons,
    1597             :          * so we follow Perl's lead and sort "equal" strings according to
    1598             :          * strcmp().
    1599             :          */
    1600      183252 :         if (result == 0)
    1601           0 :             result = strcmp(a1p, a2p);
    1602             : 
    1603      183252 :         if (a1p != a1buf)
    1604         200 :             pfree(a1p);
    1605      183252 :         if (a2p != a2buf)
    1606           0 :             pfree(a2p);
    1607             :     }
    1608             : 
    1609      305117 :     return result;
    1610             : }
    1611             : 
    1612             : /* text_cmp()
    1613             :  * Internal comparison function for text strings.
    1614             :  * Returns -1, 0 or 1
    1615             :  */
    1616             : static int
    1617      169795 : text_cmp(text *arg1, text *arg2, Oid collid)
    1618             : {
    1619             :     char       *a1p,
    1620             :                *a2p;
    1621             :     int         len1,
    1622             :                 len2;
    1623             : 
    1624      169795 :     a1p = VARDATA_ANY(arg1);
    1625      169795 :     a2p = VARDATA_ANY(arg2);
    1626             : 
    1627      169795 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1628      169795 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1629             : 
    1630      169795 :     return varstr_cmp(a1p, len1, a2p, len2, collid);
    1631             : }
    1632             : 
    1633             : /*
    1634             :  * Comparison functions for text strings.
    1635             :  *
    1636             :  * Note: btree indexes need these routines not to leak memory; therefore,
    1637             :  * be careful to free working copies of toasted datums.  Most places don't
    1638             :  * need to be so careful.
    1639             :  */
    1640             : 
    1641             : Datum
    1642       85250 : texteq(PG_FUNCTION_ARGS)
    1643             : {
    1644       85250 :     Datum       arg1 = PG_GETARG_DATUM(0);
    1645       85250 :     Datum       arg2 = PG_GETARG_DATUM(1);
    1646             :     bool        result;
    1647             :     Size        len1,
    1648             :                 len2;
    1649             : 
    1650             :     /*
    1651             :      * Since we only care about equality or not-equality, we can avoid all the
    1652             :      * expense of strcoll() here, and just do bitwise comparison.  In fact, we
    1653             :      * don't even have to do a bitwise comparison if we can show the lengths
    1654             :      * of the strings are unequal; which might save us from having to detoast
    1655             :      * one or both values.
    1656             :      */
    1657       85250 :     len1 = toast_raw_datum_size(arg1);
    1658       85250 :     len2 = toast_raw_datum_size(arg2);
    1659       85250 :     if (len1 != len2)
    1660       38592 :         result = false;
    1661             :     else
    1662             :     {
    1663       46658 :         text       *targ1 = DatumGetTextPP(arg1);
    1664       46658 :         text       *targ2 = DatumGetTextPP(arg2);
    1665             : 
    1666       93316 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1667       46658 :                          len1 - VARHDRSZ) == 0);
    1668             : 
    1669       46658 :         PG_FREE_IF_COPY(targ1, 0);
    1670       46658 :         PG_FREE_IF_COPY(targ2, 1);
    1671             :     }
    1672             : 
    1673       85250 :     PG_RETURN_BOOL(result);
    1674             : }
    1675             : 
    1676             : Datum
    1677         842 : textne(PG_FUNCTION_ARGS)
    1678             : {
    1679         842 :     Datum       arg1 = PG_GETARG_DATUM(0);
    1680         842 :     Datum       arg2 = PG_GETARG_DATUM(1);
    1681             :     bool        result;
    1682             :     Size        len1,
    1683             :                 len2;
    1684             : 
    1685             :     /* See comment in texteq() */
    1686         842 :     len1 = toast_raw_datum_size(arg1);
    1687         842 :     len2 = toast_raw_datum_size(arg2);
    1688         842 :     if (len1 != len2)
    1689          66 :         result = true;
    1690             :     else
    1691             :     {
    1692         776 :         text       *targ1 = DatumGetTextPP(arg1);
    1693         776 :         text       *targ2 = DatumGetTextPP(arg2);
    1694             : 
    1695        1552 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
    1696         776 :                          len1 - VARHDRSZ) != 0);
    1697             : 
    1698         776 :         PG_FREE_IF_COPY(targ1, 0);
    1699         776 :         PG_FREE_IF_COPY(targ2, 1);
    1700             :     }
    1701             : 
    1702         842 :     PG_RETURN_BOOL(result);
    1703             : }
    1704             : 
    1705             : Datum
    1706       12923 : text_lt(PG_FUNCTION_ARGS)
    1707             : {
    1708       12923 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1709       12923 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1710             :     bool        result;
    1711             : 
    1712       12923 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
    1713             : 
    1714       12922 :     PG_FREE_IF_COPY(arg1, 0);
    1715       12922 :     PG_FREE_IF_COPY(arg2, 1);
    1716             : 
    1717       12922 :     PG_RETURN_BOOL(result);
    1718             : }
    1719             : 
    1720             : Datum
    1721       10326 : text_le(PG_FUNCTION_ARGS)
    1722             : {
    1723       10326 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1724       10326 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1725             :     bool        result;
    1726             : 
    1727       10326 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
    1728             : 
    1729       10326 :     PG_FREE_IF_COPY(arg1, 0);
    1730       10326 :     PG_FREE_IF_COPY(arg2, 1);
    1731             : 
    1732       10326 :     PG_RETURN_BOOL(result);
    1733             : }
    1734             : 
    1735             : Datum
    1736        6770 : text_gt(PG_FUNCTION_ARGS)
    1737             : {
    1738        6770 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1739        6770 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1740             :     bool        result;
    1741             : 
    1742        6770 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
    1743             : 
    1744        6770 :     PG_FREE_IF_COPY(arg1, 0);
    1745        6770 :     PG_FREE_IF_COPY(arg2, 1);
    1746             : 
    1747        6770 :     PG_RETURN_BOOL(result);
    1748             : }
    1749             : 
    1750             : Datum
    1751        8135 : text_ge(PG_FUNCTION_ARGS)
    1752             : {
    1753        8135 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1754        8135 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1755             :     bool        result;
    1756             : 
    1757        8135 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
    1758             : 
    1759        8135 :     PG_FREE_IF_COPY(arg1, 0);
    1760        8135 :     PG_FREE_IF_COPY(arg2, 1);
    1761             : 
    1762        8135 :     PG_RETURN_BOOL(result);
    1763             : }
    1764             : 
    1765             : Datum
    1766      131628 : bttextcmp(PG_FUNCTION_ARGS)
    1767             : {
    1768      131628 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    1769      131628 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    1770             :     int32       result;
    1771             : 
    1772      131628 :     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
    1773             : 
    1774      131628 :     PG_FREE_IF_COPY(arg1, 0);
    1775      131628 :     PG_FREE_IF_COPY(arg2, 1);
    1776             : 
    1777      131628 :     PG_RETURN_INT32(result);
    1778             : }
    1779             : 
    1780             : Datum
    1781         971 : bttextsortsupport(PG_FUNCTION_ARGS)
    1782             : {
    1783         971 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    1784         971 :     Oid         collid = ssup->ssup_collation;
    1785             :     MemoryContext oldcontext;
    1786             : 
    1787         971 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    1788             : 
    1789             :     /* Use generic string SortSupport */
    1790         971 :     varstr_sortsupport(ssup, collid, false);
    1791             : 
    1792         970 :     MemoryContextSwitchTo(oldcontext);
    1793             : 
    1794         970 :     PG_RETURN_VOID();
    1795             : }
    1796             : 
    1797             : /*
    1798             :  * Generic sortsupport interface for character type's operator classes.
    1799             :  * Includes locale support, and support for BpChar semantics (i.e. removing
    1800             :  * trailing spaces before comparison).
    1801             :  *
    1802             :  * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
    1803             :  * same representation.  Callers that always use the C collation (e.g.
    1804             :  * non-collatable type callers like bytea) may have NUL bytes in their strings;
    1805             :  * this will not work with any other collation, though.
    1806             :  */
    1807             : void
    1808        1082 : varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
    1809             : {
    1810        1082 :     bool        abbreviate = ssup->abbreviate;
    1811        1082 :     bool        collate_c = false;
    1812             :     VarStringSortSupport *sss;
    1813        1082 :     pg_locale_t locale = 0;
    1814             : 
    1815             :     /*
    1816             :      * If possible, set ssup->comparator to a function which can be used to
    1817             :      * directly compare two datums.  If we can do this, we'll avoid the
    1818             :      * overhead of a trip through the fmgr layer for every comparison, which
    1819             :      * can be substantial.
    1820             :      *
    1821             :      * Most typically, we'll set the comparator to varstrfastcmp_locale, which
    1822             :      * uses strcoll() to perform comparisons and knows about the special
    1823             :      * requirements of BpChar callers.  However, if LC_COLLATE = C, we can
    1824             :      * make things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
    1825             :      * both of which use memcmp() rather than strcoll().
    1826             :      *
    1827             :      * There is a further exception on Windows.  When the database encoding is
    1828             :      * UTF-8 and we are not using the C collation, complex hacks are required.
    1829             :      * We don't currently have a comparator that handles that case, so we fall
    1830             :      * back on the slow method of having the sort code invoke bttextcmp() (in
    1831             :      * the case of text) via the fmgr trampoline.
    1832             :      */
    1833        1082 :     if (lc_collate_is_c(collid))
    1834             :     {
    1835          77 :         if (!bpchar)
    1836          75 :             ssup->comparator = varstrfastcmp_c;
    1837             :         else
    1838           2 :             ssup->comparator = bpcharfastcmp_c;
    1839             : 
    1840          77 :         collate_c = true;
    1841             :     }
    1842             : #ifdef WIN32
    1843             :     else if (GetDatabaseEncoding() == PG_UTF8)
    1844             :         return;
    1845             : #endif
    1846             :     else
    1847             :     {
    1848        1005 :         ssup->comparator = varstrfastcmp_locale;
    1849             : 
    1850             :         /*
    1851             :          * We need a collation-sensitive comparison.  To make things faster,
    1852             :          * we'll figure out the collation based on the locale id and cache the
    1853             :          * result.
    1854             :          */
    1855        1005 :         if (collid != DEFAULT_COLLATION_OID)
    1856             :         {
    1857           1 :             if (!OidIsValid(collid))
    1858             :             {
    1859             :                 /*
    1860             :                  * This typically means that the parser could not resolve a
    1861             :                  * conflict of implicit collations, so report it that way.
    1862             :                  */
    1863           1 :                 ereport(ERROR,
    1864             :                         (errcode(ERRCODE_INDETERMINATE_COLLATION),
    1865             :                          errmsg("could not determine which collation to use for string comparison"),
    1866             :                          errhint("Use the COLLATE clause to set the collation explicitly.")));
    1867             :             }
    1868           0 :             locale = pg_newlocale_from_collation(collid);
    1869             :         }
    1870             :     }
    1871             : 
    1872             :     /*
    1873             :      * Unfortunately, it seems that abbreviation for non-C collations is
    1874             :      * broken on many common platforms; testing of multiple versions of glibc
    1875             :      * reveals that, for many locales, strcoll() and strxfrm() do not return
    1876             :      * consistent results, which is fatal to this optimization.  While no
    1877             :      * other libc other than Cygwin has so far been shown to have a problem,
    1878             :      * we take the conservative course of action for right now and disable
    1879             :      * this categorically.  (Users who are certain this isn't a problem on
    1880             :      * their system can define TRUST_STRXFRM.)
    1881             :      *
    1882             :      * Even apart from the risk of broken locales, it's possible that there
    1883             :      * are platforms where the use of abbreviated keys should be disabled at
    1884             :      * compile time.  Having only 4 byte datums could make worst-case
    1885             :      * performance drastically more likely, for example.  Moreover, macOS's
    1886             :      * strxfrm() implementation is known to not effectively concentrate a
    1887             :      * significant amount of entropy from the original string in earlier
    1888             :      * transformed blobs.  It's possible that other supported platforms are
    1889             :      * similarly encumbered.  So, if we ever get past disabling this
    1890             :      * categorically, we may still want or need to disable it for particular
    1891             :      * platforms.
    1892             :      */
    1893             : #ifndef TRUST_STRXFRM
    1894        1081 :     if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
    1895        1004 :         abbreviate = false;
    1896             : #endif
    1897             : 
    1898             :     /*
    1899             :      * If we're using abbreviated keys, or if we're using a locale-aware
    1900             :      * comparison, we need to initialize a StringSortSupport object.  Both
    1901             :      * cases will make use of the temporary buffers we initialize here for
    1902             :      * scratch space (and to detect requirement for BpChar semantics from
    1903             :      * caller), and the abbreviation case requires additional state.
    1904             :      */
    1905        1081 :     if (abbreviate || !collate_c)
    1906             :     {
    1907        1059 :         sss = palloc(sizeof(VarStringSortSupport));
    1908        1059 :         sss->buf1 = palloc(TEXTBUFLEN);
    1909        1059 :         sss->buflen1 = TEXTBUFLEN;
    1910        1059 :         sss->buf2 = palloc(TEXTBUFLEN);
    1911        1059 :         sss->buflen2 = TEXTBUFLEN;
    1912             :         /* Start with invalid values */
    1913        1059 :         sss->last_len1 = -1;
    1914        1059 :         sss->last_len2 = -1;
    1915             :         /* Initialize */
    1916        1059 :         sss->last_returned = 0;
    1917        1059 :         sss->locale = locale;
    1918             : 
    1919             :         /*
    1920             :          * To avoid somehow confusing a strxfrm() blob and an original string,
    1921             :          * constantly keep track of the variety of data that buf1 and buf2
    1922             :          * currently contain.
    1923             :          *
    1924             :          * Comparisons may be interleaved with conversion calls.  Frequently,
    1925             :          * conversions and comparisons are batched into two distinct phases,
    1926             :          * but the correctness of caching cannot hinge upon this.  For
    1927             :          * comparison caching, buffer state is only trusted if cache_blob is
    1928             :          * found set to false, whereas strxfrm() caching only trusts the state
    1929             :          * when cache_blob is found set to true.
    1930             :          *
    1931             :          * Arbitrarily initialize cache_blob to true.
    1932             :          */
    1933        1059 :         sss->cache_blob = true;
    1934        1059 :         sss->collate_c = collate_c;
    1935        1059 :         sss->bpchar = bpchar;
    1936        1059 :         ssup->ssup_extra = sss;
    1937             : 
    1938             :         /*
    1939             :          * If possible, plan to use the abbreviated keys optimization.  The
    1940             :          * core code may switch back to authoritative comparator should
    1941             :          * abbreviation be aborted.
    1942             :          */
    1943        1059 :         if (abbreviate)
    1944             :         {
    1945          55 :             sss->prop_card = 0.20;
    1946          55 :             initHyperLogLog(&sss->abbr_card, 10);
    1947          55 :             initHyperLogLog(&sss->full_card, 10);
    1948          55 :             ssup->abbrev_full_comparator = ssup->comparator;
    1949          55 :             ssup->comparator = varstrcmp_abbrev;
    1950          55 :             ssup->abbrev_converter = varstr_abbrev_convert;
    1951          55 :             ssup->abbrev_abort = varstr_abbrev_abort;
    1952             :         }
    1953             :     }
    1954        1081 : }
    1955             : 
    1956             : /*
    1957             :  * sortsupport comparison func (for C locale case)
    1958             :  */
    1959             : static int
    1960        3367 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
    1961             : {
    1962        3367 :     VarString  *arg1 = DatumGetVarStringPP(x);
    1963        3367 :     VarString  *arg2 = DatumGetVarStringPP(y);
    1964             :     char       *a1p,
    1965             :                *a2p;
    1966             :     int         len1,
    1967             :                 len2,
    1968             :                 result;
    1969             : 
    1970        3367 :     a1p = VARDATA_ANY(arg1);
    1971        3367 :     a2p = VARDATA_ANY(arg2);
    1972             : 
    1973        3367 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    1974        3367 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    1975             : 
    1976        3367 :     result = memcmp(a1p, a2p, Min(len1, len2));
    1977        3367 :     if ((result == 0) && (len1 != len2))
    1978           0 :         result = (len1 < len2) ? -1 : 1;
    1979             : 
    1980             :     /* We can't afford to leak memory here. */
    1981        3367 :     if (PointerGetDatum(arg1) != x)
    1982           0 :         pfree(arg1);
    1983        3367 :     if (PointerGetDatum(arg2) != y)
    1984           0 :         pfree(arg2);
    1985             : 
    1986        3367 :     return result;
    1987             : }
    1988             : 
    1989             : /*
    1990             :  * sortsupport comparison func (for BpChar C locale case)
    1991             :  *
    1992             :  * BpChar outsources its sortsupport to this module.  Specialization for the
    1993             :  * varstr_sortsupport BpChar case, modeled on
    1994             :  * internal_bpchar_pattern_compare().
    1995             :  */
    1996             : static int
    1997           0 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
    1998             : {
    1999           0 :     BpChar     *arg1 = DatumGetBpCharPP(x);
    2000           0 :     BpChar     *arg2 = DatumGetBpCharPP(y);
    2001             :     char       *a1p,
    2002             :                *a2p;
    2003             :     int         len1,
    2004             :                 len2,
    2005             :                 result;
    2006             : 
    2007           0 :     a1p = VARDATA_ANY(arg1);
    2008           0 :     a2p = VARDATA_ANY(arg2);
    2009             : 
    2010           0 :     len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
    2011           0 :     len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
    2012             : 
    2013           0 :     result = memcmp(a1p, a2p, Min(len1, len2));
    2014           0 :     if ((result == 0) && (len1 != len2))
    2015           0 :         result = (len1 < len2) ? -1 : 1;
    2016             : 
    2017             :     /* We can't afford to leak memory here. */
    2018           0 :     if (PointerGetDatum(arg1) != x)
    2019           0 :         pfree(arg1);
    2020           0 :     if (PointerGetDatum(arg2) != y)
    2021           0 :         pfree(arg2);
    2022             : 
    2023           0 :     return result;
    2024             : }
    2025             : 
    2026             : /*
    2027             :  * sortsupport comparison func (for locale case)
    2028             :  */
    2029             : static int
    2030     4487295 : varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
    2031             : {
    2032     4487295 :     VarString  *arg1 = DatumGetVarStringPP(x);
    2033     4487295 :     VarString  *arg2 = DatumGetVarStringPP(y);
    2034             :     bool        arg1_match;
    2035     4487295 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2036             : 
    2037             :     /* working state */
    2038             :     char       *a1p,
    2039             :                *a2p;
    2040             :     int         len1,
    2041             :                 len2,
    2042             :                 result;
    2043             : 
    2044     4487295 :     a1p = VARDATA_ANY(arg1);
    2045     4487295 :     a2p = VARDATA_ANY(arg2);
    2046             : 
    2047     4487295 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2048     4487295 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2049             : 
    2050             :     /* Fast pre-check for equality, as discussed in varstr_cmp() */
    2051     4487295 :     if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
    2052             :     {
    2053             :         /*
    2054             :          * No change in buf1 or buf2 contents, so avoid changing last_len1 or
    2055             :          * last_len2.  Existing contents of buffers might still be used by
    2056             :          * next call.
    2057             :          *
    2058             :          * It's fine to allow the comparison of BpChar padding bytes here,
    2059             :          * even though that implies that the memcmp() will usually be
    2060             :          * performed for BpChar callers (though multibyte characters could
    2061             :          * still prevent that from occurring).  The memcmp() is still very
    2062             :          * cheap, and BpChar's funny semantics have us remove trailing spaces
    2063             :          * (not limited to padding), so we need make no distinction between
    2064             :          * padding space characters and "real" space characters.
    2065             :          */
    2066     1261217 :         result = 0;
    2067     1261217 :         goto done;
    2068             :     }
    2069             : 
    2070     3226078 :     if (sss->bpchar)
    2071             :     {
    2072             :         /* Get true number of bytes, ignoring trailing spaces */
    2073        1401 :         len1 = bpchartruelen(a1p, len1);
    2074        1401 :         len2 = bpchartruelen(a2p, len2);
    2075             :     }
    2076             : 
    2077     3226078 :     if (len1 >= sss->buflen1)
    2078             :     {
    2079           0 :         pfree(sss->buf1);
    2080           0 :         sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2081           0 :         sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
    2082             :     }
    2083     3226078 :     if (len2 >= sss->buflen2)
    2084             :     {
    2085           0 :         pfree(sss->buf2);
    2086           0 :         sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
    2087           0 :         sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
    2088             :     }
    2089             : 
    2090             :     /*
    2091             :      * We're likely to be asked to compare the same strings repeatedly, and
    2092             :      * memcmp() is so much cheaper than strcoll() that it pays to try to cache
    2093             :      * comparisons, even though in general there is no reason to think that
    2094             :      * that will work out (every string datum may be unique).  Caching does
    2095             :      * not slow things down measurably when it doesn't work out, and can speed
    2096             :      * things up by rather a lot when it does.  In part, this is because the
    2097             :      * memcmp() compares data from cachelines that are needed in L1 cache even
    2098             :      * when the last comparison's result cannot be reused.
    2099             :      */
    2100     3226078 :     arg1_match = true;
    2101     3226078 :     if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
    2102             :     {
    2103     2641645 :         arg1_match = false;
    2104     2641645 :         memcpy(sss->buf1, a1p, len1);
    2105     2641645 :         sss->buf1[len1] = '\0';
    2106     2641645 :         sss->last_len1 = len1;
    2107             :     }
    2108             : 
    2109             :     /*
    2110             :      * If we're comparing the same two strings as last time, we can return the
    2111             :      * same answer without calling strcoll() again.  This is more likely than
    2112             :      * it seems (at least with moderate to low cardinality sets), because
    2113             :      * quicksort compares the same pivot against many values.
    2114             :      */
    2115     3226078 :     if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
    2116             :     {
    2117      405475 :         memcpy(sss->buf2, a2p, len2);
    2118      405475 :         sss->buf2[len2] = '\0';
    2119      405475 :         sss->last_len2 = len2;
    2120             :     }
    2121     2820603 :     else if (arg1_match && !sss->cache_blob)
    2122             :     {
    2123             :         /* Use result cached following last actual strcoll() call */
    2124      515151 :         result = sss->last_returned;
    2125      515151 :         goto done;
    2126             :     }
    2127             : 
    2128     2710927 :     if (sss->locale)
    2129             :     {
    2130           0 :         if (sss->locale->provider == COLLPROVIDER_ICU)
    2131             :         {
    2132             : #ifdef USE_ICU
    2133             : #ifdef HAVE_UCOL_STRCOLLUTF8
    2134             :             if (GetDatabaseEncoding() == PG_UTF8)
    2135             :             {
    2136             :                 UErrorCode  status;
    2137             : 
    2138             :                 status = U_ZERO_ERROR;
    2139             :                 result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
    2140             :                                           a1p, len1,
    2141             :                                           a2p, len2,
    2142             :                                           &status);
    2143             :                 if (U_FAILURE(status))
    2144             :                     ereport(ERROR,
    2145             :                             (errmsg("collation failed: %s", u_errorName(status))));
    2146             :             }
    2147             :             else
    2148             : #endif
    2149             :             {
    2150             :                 int32_t     ulen1,
    2151             :                             ulen2;
    2152             :                 UChar      *uchar1,
    2153             :                            *uchar2;
    2154             : 
    2155             :                 ulen1 = icu_to_uchar(&uchar1, a1p, len1);
    2156             :                 ulen2 = icu_to_uchar(&uchar2, a2p, len2);
    2157             : 
    2158             :                 result = ucol_strcoll(sss->locale->info.icu.ucol,
    2159             :                                       uchar1, ulen1,
    2160             :                                       uchar2, ulen2);
    2161             : 
    2162             :                 pfree(uchar1);
    2163             :                 pfree(uchar2);
    2164             :             }
    2165             : #else                           /* not USE_ICU */
    2166             :             /* shouldn't happen */
    2167           0 :             elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
    2168             : #endif                          /* not USE_ICU */
    2169             :         }
    2170             :         else
    2171             :         {
    2172             : #ifdef HAVE_LOCALE_T
    2173           0 :             result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
    2174             : #else
    2175             :             /* shouldn't happen */
    2176             :             elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
    2177             : #endif
    2178             :         }
    2179             :     }
    2180             :     else
    2181     2710927 :         result = strcoll(sss->buf1, sss->buf2);
    2182             : 
    2183             :     /*
    2184             :      * In some locales strcoll() can claim that nonidentical strings are
    2185             :      * equal. Believing that would be bad news for a number of reasons, so we
    2186             :      * follow Perl's lead and sort "equal" strings according to strcmp().
    2187             :      */
    2188     2710927 :     if (result == 0)
    2189           0 :         result = strcmp(sss->buf1, sss->buf2);
    2190             : 
    2191             :     /* Cache result, perhaps saving an expensive strcoll() call next time */
    2192     2710927 :     sss->cache_blob = false;
    2193     2710927 :     sss->last_returned = result;
    2194             : done:
    2195             :     /* We can't afford to leak memory here. */
    2196     4487295 :     if (PointerGetDatum(arg1) != x)
    2197           0 :         pfree(arg1);
    2198     4487295 :     if (PointerGetDatum(arg2) != y)
    2199           0 :         pfree(arg2);
    2200             : 
    2201     4487295 :     return result;
    2202             : }
    2203             : 
    2204             : /*
    2205             :  * Abbreviated key comparison func
    2206             :  */
    2207             : static int
    2208        4793 : varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
    2209             : {
    2210             :     /*
    2211             :      * When 0 is returned, the core system will call varstrfastcmp_c()
    2212             :      * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale().  Even a
    2213             :      * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
    2214             :      * authoritatively, for the same reason that there is a strcoll()
    2215             :      * tie-breaker call to strcmp() in varstr_cmp().
    2216             :      */
    2217        4793 :     if (x > y)
    2218         832 :         return 1;
    2219        3961 :     else if (x == y)
    2220        3335 :         return 0;
    2221             :     else
    2222         626 :         return -1;
    2223             : }
    2224             : 
    2225             : /*
    2226             :  * Conversion routine for sortsupport.  Converts original to abbreviated key
    2227             :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
    2228             :  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
    2229             :  * stored in reverse order), and treat it as an unsigned integer.  When the "C"
    2230             :  * locale is used, or in case of bytea, just memcpy() from original instead.
    2231             :  */
    2232             : static Datum
    2233        1071 : varstr_abbrev_convert(Datum original, SortSupport ssup)
    2234             : {
    2235        1071 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2236        1071 :     VarString  *authoritative = DatumGetVarStringPP(original);
    2237        1071 :     char       *authoritative_data = VARDATA_ANY(authoritative);
    2238             : 
    2239             :     /* working state */
    2240             :     Datum       res;
    2241             :     char       *pres;
    2242             :     int         len;
    2243             :     uint32      hash;
    2244             : 
    2245        1071 :     pres = (char *) &res;
    2246             :     /* memset(), so any non-overwritten bytes are NUL */
    2247        1071 :     memset(pres, 0, sizeof(Datum));
    2248        1071 :     len = VARSIZE_ANY_EXHDR(authoritative);
    2249             : 
    2250             :     /* Get number of bytes, ignoring trailing spaces */
    2251        1071 :     if (sss->bpchar)
    2252           0 :         len = bpchartruelen(authoritative_data, len);
    2253             : 
    2254             :     /*
    2255             :      * If we're using the C collation, use memcpy(), rather than strxfrm(), to
    2256             :      * abbreviate keys.  The full comparator for the C locale is always
    2257             :      * memcmp().  It would be incorrect to allow bytea callers (callers that
    2258             :      * always force the C collation -- bytea isn't a collatable type, but this
    2259             :      * approach is convenient) to use strxfrm().  This is because bytea
    2260             :      * strings may contain NUL bytes.  Besides, this should be faster, too.
    2261             :      *
    2262             :      * More generally, it's okay that bytea callers can have NUL bytes in
    2263             :      * strings because varstrcmp_abbrev() need not make a distinction between
    2264             :      * terminating NUL bytes, and NUL bytes representing actual NULs in the
    2265             :      * authoritative representation.  Hopefully a comparison at or past one
    2266             :      * abbreviated key's terminating NUL byte will resolve the comparison
    2267             :      * without consulting the authoritative representation; specifically, some
    2268             :      * later non-NUL byte in the longer string can resolve the comparison
    2269             :      * against a subsequent terminating NUL in the shorter string.  There will
    2270             :      * usually be what is effectively a "length-wise" resolution there and
    2271             :      * then.
    2272             :      *
    2273             :      * If that doesn't work out -- if all bytes in the longer string
    2274             :      * positioned at or past the offset of the smaller string's (first)
    2275             :      * terminating NUL are actually representative of NUL bytes in the
    2276             :      * authoritative binary string (perhaps with some *terminating* NUL bytes
    2277             :      * towards the end of the longer string iff it happens to still be small)
    2278             :      * -- then an authoritative tie-breaker will happen, and do the right
    2279             :      * thing: explicitly consider string length.
    2280             :      */
    2281        1071 :     if (sss->collate_c)
    2282        1071 :         memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
    2283             :     else
    2284             :     {
    2285             :         Size        bsize;
    2286             : #ifdef USE_ICU
    2287             :         int32_t     ulen = -1;
    2288             :         UChar      *uchar = NULL;
    2289             : #endif
    2290             : 
    2291             :         /*
    2292             :          * We're not using the C collation, so fall back on strxfrm or ICU
    2293             :          * analogs.
    2294             :          */
    2295             : 
    2296             :         /* By convention, we use buffer 1 to store and NUL-terminate */
    2297           0 :         if (len >= sss->buflen1)
    2298             :         {
    2299           0 :             pfree(sss->buf1);
    2300           0 :             sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
    2301           0 :             sss->buf1 = palloc(sss->buflen1);
    2302             :         }
    2303             : 
    2304             :         /* Might be able to reuse strxfrm() blob from last call */
    2305           0 :         if (sss->last_len1 == len && sss->cache_blob &&
    2306           0 :             memcmp(sss->buf1, authoritative_data, len) == 0)
    2307             :         {
    2308           0 :             memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
    2309             :             /* No change affecting cardinality, so no hashing required */
    2310           0 :             goto done;
    2311             :         }
    2312             : 
    2313           0 :         memcpy(sss->buf1, authoritative_data, len);
    2314             : 
    2315             :         /*
    2316             :          * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
    2317             :          * necessary for ICU, but doesn't hurt.
    2318             :          */
    2319           0 :         sss->buf1[len] = '\0';
    2320           0 :         sss->last_len1 = len;
    2321             : 
    2322             : #ifdef USE_ICU
    2323             :         /* When using ICU and not UTF8, convert string to UChar. */
    2324             :         if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
    2325             :             GetDatabaseEncoding() != PG_UTF8)
    2326             :             ulen = icu_to_uchar(&uchar, sss->buf1, len);
    2327             : #endif
    2328             : 
    2329             :         /*
    2330             :          * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
    2331             :          * and try again.  Both of these functions have the result buffer
    2332             :          * content undefined if the result did not fit, so we need to retry
    2333             :          * until everything fits, even though we only need the first few bytes
    2334             :          * in the end.  When using ucol_nextSortKeyPart(), however, we only
    2335             :          * ask for as many bytes as we actually need.
    2336             :          */
    2337             :         for (;;)
    2338             :         {
    2339             : #ifdef USE_ICU
    2340             :             if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
    2341             :             {
    2342             :                 /*
    2343             :                  * When using UTF8, use the iteration interface so we only
    2344             :                  * need to produce as many bytes as we actually need.
    2345             :                  */
    2346             :                 if (GetDatabaseEncoding() == PG_UTF8)
    2347             :                 {
    2348             :                     UCharIterator iter;
    2349             :                     uint32_t    state[2];
    2350             :                     UErrorCode  status;
    2351             : 
    2352             :                     uiter_setUTF8(&iter, sss->buf1, len);
    2353             :                     state[0] = state[1] = 0;    /* won't need that again */
    2354             :                     status = U_ZERO_ERROR;
    2355             :                     bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
    2356             :                                                  &iter,
    2357             :                                                  state,
    2358             :                                                  (uint8_t *) sss->buf2,
    2359             :                                                  Min(sizeof(Datum), sss->buflen2),
    2360             :                                                  &status);
    2361             :                     if (U_FAILURE(status))
    2362             :                         ereport(ERROR,
    2363             :                                 (errmsg("sort key generation failed: %s",
    2364             :                                         u_errorName(status))));
    2365             :                 }
    2366             :                 else
    2367             :                     bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
    2368             :                                             uchar, ulen,
    2369             :                                             (uint8_t *) sss->buf2, sss->buflen2);
    2370             :             }
    2371             :             else
    2372             : #endif
    2373             : #ifdef HAVE_LOCALE_T
    2374           0 :             if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
    2375           0 :                 bsize = strxfrm_l(sss->buf2, sss->buf1,
    2376           0 :                                   sss->buflen2, sss->locale->info.lt);
    2377             :             else
    2378             : #endif
    2379           0 :                 bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
    2380             : 
    2381           0 :             sss->last_len2 = bsize;
    2382           0 :             if (bsize < sss->buflen2)
    2383           0 :                 break;
    2384             : 
    2385             :             /*
    2386             :              * Grow buffer and retry.
    2387             :              */
    2388           0 :             pfree(sss->buf2);
    2389           0 :             sss->buflen2 = Max(bsize + 1,
    2390             :                                Min(sss->buflen2 * 2, MaxAllocSize));
    2391           0 :             sss->buf2 = palloc(sss->buflen2);
    2392           0 :         }
    2393             : 
    2394             :         /*
    2395             :          * Every Datum byte is always compared.  This is safe because the
    2396             :          * strxfrm() blob is itself NUL terminated, leaving no danger of
    2397             :          * misinterpreting any NUL bytes not intended to be interpreted as
    2398             :          * logically representing termination.
    2399             :          *
    2400             :          * (Actually, even if there were NUL bytes in the blob it would be
    2401             :          * okay.  See remarks on bytea case above.)
    2402             :          */
    2403           0 :         memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
    2404             : 
    2405             : #ifdef USE_ICU
    2406             :         if (uchar)
    2407             :             pfree(uchar);
    2408             : #endif
    2409             :     }
    2410             : 
    2411             :     /*
    2412             :      * Maintain approximate cardinality of both abbreviated keys and original,
    2413             :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
    2414             :      * the worst case, where we do many string transformations for no saving
    2415             :      * in full strcoll()-based comparisons.  These statistics are used by
    2416             :      * varstr_abbrev_abort().
    2417             :      *
    2418             :      * First, Hash key proper, or a significant fraction of it.  Mix in length
    2419             :      * in order to compensate for cases where differences are past
    2420             :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
    2421             :      */
    2422        1071 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
    2423             :                                    Min(len, PG_CACHE_LINE_SIZE)));
    2424             : 
    2425        1071 :     if (len > PG_CACHE_LINE_SIZE)
    2426           0 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
    2427             : 
    2428        1071 :     addHyperLogLog(&sss->full_card, hash);
    2429             : 
    2430             :     /* Hash abbreviated key */
    2431             : #if SIZEOF_DATUM == 8
    2432             :     {
    2433             :         uint32      lohalf,
    2434             :                     hihalf;
    2435             : 
    2436             :         lohalf = (uint32) res;
    2437             :         hihalf = (uint32) (res >> 32);
    2438             :         hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
    2439             :     }
    2440             : #else                           /* SIZEOF_DATUM != 8 */
    2441        1071 :     hash = DatumGetUInt32(hash_uint32((uint32) res));
    2442             : #endif
    2443             : 
    2444        1071 :     addHyperLogLog(&sss->abbr_card, hash);
    2445             : 
    2446             :     /* Cache result, perhaps saving an expensive strxfrm() call next time */
    2447        1071 :     sss->cache_blob = true;
    2448             : done:
    2449             : 
    2450             :     /*
    2451             :      * Byteswap on little-endian machines.
    2452             :      *
    2453             :      * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
    2454             :      * comparator) works correctly on all platforms.  If we didn't do this,
    2455             :      * the comparator would have to call memcmp() with a pair of pointers to
    2456             :      * the first byte of each abbreviated key, which is slower.
    2457             :      */
    2458        1071 :     res = DatumBigEndianToNative(res);
    2459             : 
    2460             :     /* Don't leak memory here */
    2461        1071 :     if (PointerGetDatum(authoritative) != original)
    2462           0 :         pfree(authoritative);
    2463             : 
    2464        1071 :     return res;
    2465             : }
    2466             : 
    2467             : /*
    2468             :  * Callback for estimating effectiveness of abbreviated key optimization, using
    2469             :  * heuristic rules.  Returns value indicating if the abbreviation optimization
    2470             :  * should be aborted, based on its projected effectiveness.
    2471             :  */
    2472             : static bool
    2473           7 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
    2474             : {
    2475           7 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
    2476             :     double      abbrev_distinct,
    2477             :                 key_distinct;
    2478             : 
    2479           7 :     Assert(ssup->abbreviate);
    2480             : 
    2481             :     /* Have a little patience */
    2482           7 :     if (memtupcount < 100)
    2483           4 :         return false;
    2484             : 
    2485           3 :     abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
    2486           3 :     key_distinct = estimateHyperLogLog(&sss->full_card);
    2487             : 
    2488             :     /*
    2489             :      * Clamp cardinality estimates to at least one distinct value.  While
    2490             :      * NULLs are generally disregarded, if only NULL values were seen so far,
    2491             :      * that might misrepresent costs if we failed to clamp.
    2492             :      */
    2493           3 :     if (abbrev_distinct <= 1.0)
    2494           0 :         abbrev_distinct = 1.0;
    2495             : 
    2496           3 :     if (key_distinct <= 1.0)
    2497           0 :         key_distinct = 1.0;
    2498             : 
    2499             :     /*
    2500             :      * In the worst case all abbreviated keys are identical, while at the same
    2501             :      * time there are differences within full key strings not captured in
    2502             :      * abbreviations.
    2503             :      */
    2504             : #ifdef TRACE_SORT
    2505           3 :     if (trace_sort)
    2506             :     {
    2507           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
    2508             : 
    2509           0 :         elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
    2510             :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
    2511             :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
    2512             :              sss->prop_card);
    2513             :     }
    2514             : #endif
    2515             : 
    2516             :     /*
    2517             :      * If the number of distinct abbreviated keys approximately matches the
    2518             :      * number of distinct authoritative original keys, that's reason enough to
    2519             :      * proceed.  We can win even with a very low cardinality set if most
    2520             :      * tie-breakers only memcmp().  This is by far the most important
    2521             :      * consideration.
    2522             :      *
    2523             :      * While comparisons that are resolved at the abbreviated key level are
    2524             :      * considerably cheaper than tie-breakers resolved with memcmp(), both of
    2525             :      * those two outcomes are so much cheaper than a full strcoll() once
    2526             :      * sorting is underway that it doesn't seem worth it to weigh abbreviated
    2527             :      * cardinality against the overall size of the set in order to more
    2528             :      * accurately model costs.  Assume that an abbreviated comparison, and an
    2529             :      * abbreviated comparison with a cheap memcmp()-based authoritative
    2530             :      * resolution are equivalent.
    2531             :      */
    2532           3 :     if (abbrev_distinct > key_distinct * sss->prop_card)
    2533             :     {
    2534             :         /*
    2535             :          * When we have exceeded 10,000 tuples, decay required cardinality
    2536             :          * aggressively for next call.
    2537             :          *
    2538             :          * This is useful because the number of comparisons required on
    2539             :          * average increases at a linearithmic rate, and at roughly 10,000
    2540             :          * tuples that factor will start to dominate over the linear costs of
    2541             :          * string transformation (this is a conservative estimate).  The decay
    2542             :          * rate is chosen to be a little less aggressive than halving -- which
    2543             :          * (since we're called at points at which memtupcount has doubled)
    2544             :          * would never see the cost model actually abort past the first call
    2545             :          * following a decay.  This decay rate is mostly a precaution against
    2546             :          * a sudden, violent swing in how well abbreviated cardinality tracks
    2547             :          * full key cardinality.  The decay also serves to prevent a marginal
    2548             :          * case from being aborted too late, when too much has already been
    2549             :          * invested in string transformation.
    2550             :          *
    2551             :          * It's possible for sets of several million distinct strings with
    2552             :          * mere tens of thousands of distinct abbreviated keys to still
    2553             :          * benefit very significantly.  This will generally occur provided
    2554             :          * each abbreviated key is a proxy for a roughly uniform number of the
    2555             :          * set's full keys. If it isn't so, we hope to catch that early and
    2556             :          * abort.  If it isn't caught early, by the time the problem is
    2557             :          * apparent it's probably not worth aborting.
    2558             :          */
    2559           3 :         if (memtupcount > 10000)
    2560           0 :             sss->prop_card *= 0.65;
    2561             : 
    2562           3 :         return false;
    2563             :     }
    2564             : 
    2565             :     /*
    2566             :      * Abort abbreviation strategy.
    2567             :      *
    2568             :      * The worst case, where all abbreviated keys are identical while all
    2569             :      * original strings differ will typically only see a regression of about
    2570             :      * 10% in execution time for small to medium sized lists of strings.
    2571             :      * Whereas on modern CPUs where cache stalls are the dominant cost, we can
    2572             :      * often expect very large improvements, particularly with sets of strings
    2573             :      * of moderately high to high abbreviated cardinality.  There is little to
    2574             :      * lose but much to gain, which our strategy reflects.
    2575             :      */
    2576             : #ifdef TRACE_SORT
    2577           0 :     if (trace_sort)
    2578           0 :         elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
    2579             :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
    2580             :              memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
    2581             : #endif
    2582             : 
    2583           0 :     return true;
    2584             : }
    2585             : 
    2586             : Datum
    2587           7 : text_larger(PG_FUNCTION_ARGS)
    2588             : {
    2589           7 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2590           7 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2591             :     text       *result;
    2592             : 
    2593           7 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
    2594             : 
    2595           7 :     PG_RETURN_TEXT_P(result);
    2596             : }
    2597             : 
    2598             : Datum
    2599           6 : text_smaller(PG_FUNCTION_ARGS)
    2600             : {
    2601           6 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2602           6 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2603             :     text       *result;
    2604             : 
    2605           6 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
    2606             : 
    2607           6 :     PG_RETURN_TEXT_P(result);
    2608             : }
    2609             : 
    2610             : 
    2611             : /*
    2612             :  * The following operators support character-by-character comparison
    2613             :  * of text datums, to allow building indexes suitable for LIKE clauses.
    2614             :  * Note that the regular texteq/textne comparison operators, and regular
    2615             :  * support functions 1 and 2 with "C" collation are assumed to be
    2616             :  * compatible with these!
    2617             :  */
    2618             : 
    2619             : static int
    2620       25163 : internal_text_pattern_compare(text *arg1, text *arg2)
    2621             : {
    2622             :     int         result;
    2623             :     int         len1,
    2624             :                 len2;
    2625             : 
    2626       25163 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    2627       25163 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    2628             : 
    2629       25163 :     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    2630       25163 :     if (result != 0)
    2631       25151 :         return result;
    2632          12 :     else if (len1 < len2)
    2633           0 :         return -1;
    2634          12 :     else if (len1 > len2)
    2635           3 :         return 1;
    2636             :     else
    2637           9 :         return 0;
    2638             : }
    2639             : 
    2640             : 
    2641             : Datum
    2642        6401 : text_pattern_lt(PG_FUNCTION_ARGS)
    2643             : {
    2644        6401 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2645        6401 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2646             :     int         result;
    2647             : 
    2648        6401 :     result = internal_text_pattern_compare(arg1, arg2);
    2649             : 
    2650        6401 :     PG_FREE_IF_COPY(arg1, 0);
    2651        6401 :     PG_FREE_IF_COPY(arg2, 1);
    2652             : 
    2653        6401 :     PG_RETURN_BOOL(result < 0);
    2654             : }
    2655             : 
    2656             : 
    2657             : Datum
    2658        6251 : text_pattern_le(PG_FUNCTION_ARGS)
    2659             : {
    2660        6251 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2661        6251 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2662             :     int         result;
    2663             : 
    2664        6251 :     result = internal_text_pattern_compare(arg1, arg2);
    2665             : 
    2666        6251 :     PG_FREE_IF_COPY(arg1, 0);
    2667        6251 :     PG_FREE_IF_COPY(arg2, 1);
    2668             : 
    2669        6251 :     PG_RETURN_BOOL(result <= 0);
    2670             : }
    2671             : 
    2672             : 
    2673             : Datum
    2674        6251 : text_pattern_ge(PG_FUNCTION_ARGS)
    2675             : {
    2676        6251 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2677        6251 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2678             :     int         result;
    2679             : 
    2680        6251 :     result = internal_text_pattern_compare(arg1, arg2);
    2681             : 
    2682        6251 :     PG_FREE_IF_COPY(arg1, 0);
    2683        6251 :     PG_FREE_IF_COPY(arg2, 1);
    2684             : 
    2685        6251 :     PG_RETURN_BOOL(result >= 0);
    2686             : }
    2687             : 
    2688             : 
    2689             : Datum
    2690        6251 : text_pattern_gt(PG_FUNCTION_ARGS)
    2691             : {
    2692        6251 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2693        6251 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2694             :     int         result;
    2695             : 
    2696        6251 :     result = internal_text_pattern_compare(arg1, arg2);
    2697             : 
    2698        6251 :     PG_FREE_IF_COPY(arg1, 0);
    2699        6251 :     PG_FREE_IF_COPY(arg2, 1);
    2700             : 
    2701        6251 :     PG_RETURN_BOOL(result > 0);
    2702             : }
    2703             : 
    2704             : 
    2705             : Datum
    2706           9 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
    2707             : {
    2708           9 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
    2709           9 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
    2710             :     int         result;
    2711             : 
    2712           9 :     result = internal_text_pattern_compare(arg1, arg2);
    2713             : 
    2714           9 :     PG_FREE_IF_COPY(arg1, 0);
    2715           9 :     PG_FREE_IF_COPY(arg2, 1);
    2716             : 
    2717           9 :     PG_RETURN_INT32(result);
    2718             : }
    2719             : 
    2720             : 
    2721             : Datum
    2722          21 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
    2723             : {
    2724          21 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    2725             :     MemoryContext oldcontext;
    2726             : 
    2727          21 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    2728             : 
    2729             :     /* Use generic string SortSupport, forcing "C" collation */
    2730          21 :     varstr_sortsupport(ssup, C_COLLATION_OID, false);
    2731             : 
    2732          21 :     MemoryContextSwitchTo(oldcontext);
    2733             : 
    2734          21 :     PG_RETURN_VOID();
    2735             : }
    2736             : 
    2737             : 
    2738             : /*-------------------------------------------------------------
    2739             :  * byteaoctetlen
    2740             :  *
    2741             :  * get the number of bytes contained in an instance of type 'bytea'
    2742             :  *-------------------------------------------------------------
    2743             :  */
    2744             : Datum
    2745           0 : byteaoctetlen(PG_FUNCTION_ARGS)
    2746             : {
    2747           0 :     Datum       str = PG_GETARG_DATUM(0);
    2748             : 
    2749             :     /* We need not detoast the input at all */
    2750           0 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
    2751             : }
    2752             : 
    2753             : /*
    2754             :  * byteacat -
    2755             :  *    takes two bytea* and returns a bytea* that is the concatenation of
    2756             :  *    the two.
    2757             :  *
    2758             :  * Cloned from textcat and modified as required.
    2759             :  */
    2760             : Datum
    2761           0 : byteacat(PG_FUNCTION_ARGS)
    2762             : {
    2763           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    2764           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    2765             : 
    2766           0 :     PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
    2767             : }
    2768             : 
    2769             : /*
    2770             :  * bytea_catenate
    2771             :  *  Guts of byteacat(), broken out so it can be used by other functions
    2772             :  *
    2773             :  * Arguments can be in short-header form, but not compressed or out-of-line
    2774             :  */
    2775             : static bytea *
    2776           6 : bytea_catenate(bytea *t1, bytea *t2)
    2777             : {
    2778             :     bytea      *result;
    2779             :     int         len1,
    2780             :                 len2,
    2781             :                 len;
    2782             :     char       *ptr;
    2783             : 
    2784           6 :     len1 = VARSIZE_ANY_EXHDR(t1);
    2785           6 :     len2 = VARSIZE_ANY_EXHDR(t2);
    2786             : 
    2787             :     /* paranoia ... probably should throw error instead? */
    2788           6 :     if (len1 < 0)
    2789           0 :         len1 = 0;
    2790           6 :     if (len2 < 0)
    2791           0 :         len2 = 0;
    2792             : 
    2793           6 :     len = len1 + len2 + VARHDRSZ;
    2794           6 :     result = (bytea *) palloc(len);
    2795             : 
    2796             :     /* Set size of result string... */
    2797           6 :     SET_VARSIZE(result, len);
    2798             : 
    2799             :     /* Fill data field of result string... */
    2800           6 :     ptr = VARDATA(result);
    2801           6 :     if (len1 > 0)
    2802           6 :         memcpy(ptr, VARDATA_ANY(t1), len1);
    2803           6 :     if (len2 > 0)
    2804           3 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
    2805             : 
    2806           6 :     return result;
    2807             : }
    2808             : 
    2809             : #define PG_STR_GET_BYTEA(str_) \
    2810             :     DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
    2811             : 
    2812             : /*
    2813             :  * bytea_substr()
    2814             :  * Return a substring starting at the specified position.
    2815             :  * Cloned from text_substr and modified as required.
    2816             :  *
    2817             :  * Input:
    2818             :  *  - string
    2819             :  *  - starting position (is one-based)
    2820             :  *  - string length (optional)
    2821             :  *
    2822             :  * If the starting position is zero or less, then return from the start of the string
    2823             :  * adjusting the length to be consistent with the "negative start" per SQL.
    2824             :  * If the length is less than zero, an ERROR is thrown. If no third argument
    2825             :  * (length) is provided, the length to the end of the string is assumed.
    2826             :  */
    2827             : Datum
    2828           9 : bytea_substr(PG_FUNCTION_ARGS)
    2829             : {
    2830           9 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    2831             :                                       PG_GETARG_INT32(1),
    2832             :                                       PG_GETARG_INT32(2),
    2833             :                                       false));
    2834             : }
    2835             : 
    2836             : /*
    2837             :  * bytea_substr_no_len -
    2838             :  *    Wrapper to avoid opr_sanity failure due to
    2839             :  *    one function accepting a different number of args.
    2840             :  */
    2841             : Datum
    2842           4 : bytea_substr_no_len(PG_FUNCTION_ARGS)
    2843             : {
    2844           4 :     PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
    2845             :                                       PG_GETARG_INT32(1),
    2846             :                                       -1,
    2847             :                                       true));
    2848             : }
    2849             : 
    2850             : static bytea *
    2851          19 : bytea_substring(Datum str,
    2852             :                 int S,
    2853             :                 int L,
    2854             :                 bool length_not_specified)
    2855             : {
    2856             :     int         S1;             /* adjusted start position */
    2857             :     int         L1;             /* adjusted substring length */
    2858             : 
    2859          19 :     S1 = Max(S, 1);
    2860             : 
    2861          19 :     if (length_not_specified)
    2862             :     {
    2863             :         /*
    2864             :          * Not passed a length - DatumGetByteaPSlice() grabs everything to the
    2865             :          * end of the string if we pass it a negative value for length.
    2866             :          */
    2867           7 :         L1 = -1;
    2868             :     }
    2869             :     else
    2870             :     {
    2871             :         /* end position */
    2872          12 :         int         E = S + L;
    2873             : 
    2874             :         /*
    2875             :          * A negative value for L is the only way for the end position to be
    2876             :          * before the start. SQL99 says to throw an error.
    2877             :          */
    2878          12 :         if (E < S)
    2879           1 :             ereport(ERROR,
    2880             :                     (errcode(ERRCODE_SUBSTRING_ERROR),
    2881             :                      errmsg("negative substring length not allowed")));
    2882             : 
    2883             :         /*
    2884             :          * A zero or negative value for the end position can happen if the
    2885             :          * start was negative or one. SQL99 says to return a zero-length
    2886             :          * string.
    2887             :          */
    2888          11 :         if (E < 1)
    2889           0 :             return PG_STR_GET_BYTEA("");
    2890             : 
    2891          11 :         L1 = E - S1;
    2892             :     }
    2893             : 
    2894             :     /*
    2895             :      * If the start position is past the end of the string, SQL99 says to
    2896             :      * return a zero-length string -- DatumGetByteaPSlice() will do that for
    2897             :      * us. Convert to zero-based starting position
    2898             :      */
    2899          18 :     return DatumGetByteaPSlice(str, S1 - 1, L1);
    2900             : }
    2901             : 
    2902             : /*
    2903             :  * byteaoverlay
    2904             :  *  Replace specified substring of first string with second
    2905             :  *
    2906             :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
    2907             :  * This code is a direct implementation of what the standard says.
    2908             :  */
    2909             : Datum
    2910           1 : byteaoverlay(PG_FUNCTION_ARGS)
    2911             : {
    2912           1 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    2913           1 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    2914           1 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    2915           1 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
    2916             : 
    2917           1 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    2918             : }
    2919             : 
    2920             : Datum
    2921           2 : byteaoverlay_no_len(PG_FUNCTION_ARGS)
    2922             : {
    2923           2 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    2924           2 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    2925           2 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
    2926             :     int         sl;
    2927             : 
    2928           2 :     sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
    2929           2 :     PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
    2930             : }
    2931             : 
    2932             : static bytea *
    2933           3 : bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
    2934             : {
    2935             :     bytea      *result;
    2936             :     bytea      *s1;
    2937             :     bytea      *s2;
    2938             :     int         sp_pl_sl;
    2939             : 
    2940             :     /*
    2941             :      * Check for possible integer-overflow cases.  For negative sp, throw a
    2942             :      * "substring length" error because that's what should be expected
    2943             :      * according to the spec's definition of OVERLAY().
    2944             :      */
    2945           3 :     if (sp <= 0)
    2946           0 :         ereport(ERROR,
    2947             :                 (errcode(ERRCODE_SUBSTRING_ERROR),
    2948             :                  errmsg("negative substring length not allowed")));
    2949           3 :     sp_pl_sl = sp + sl;
    2950           3 :     if (sp_pl_sl <= sl)
    2951           0 :         ereport(ERROR,
    2952             :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    2953             :                  errmsg("integer out of range")));
    2954             : 
    2955           3 :     s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
    2956           3 :     s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
    2957           3 :     result = bytea_catenate(s1, t2);
    2958           3 :     result = bytea_catenate(result, s2);
    2959             : 
    2960           3 :     return result;
    2961             : }
    2962             : 
    2963             : /*
    2964             :  * byteapos -
    2965             :  *    Return the position of the specified substring.
    2966             :  *    Implements the SQL POSITION() function.
    2967             :  * Cloned from textpos and modified as required.
    2968             :  */
    2969             : Datum
    2970           0 : byteapos(PG_FUNCTION_ARGS)
    2971             : {
    2972           0 :     bytea      *t1 = PG_GETARG_BYTEA_PP(0);
    2973           0 :     bytea      *t2 = PG_GETARG_BYTEA_PP(1);
    2974             :     int         pos;
    2975             :     int         px,
    2976             :                 p;
    2977             :     int         len1,
    2978             :                 len2;
    2979             :     char       *p1,
    2980             :                *p2;
    2981             : 
    2982           0 :     len1 = VARSIZE_ANY_EXHDR(t1);
    2983           0 :     len2 = VARSIZE_ANY_EXHDR(t2);
    2984             : 
    2985           0 :     if (len2 <= 0)
    2986           0 :         PG_RETURN_INT32(1);     /* result for empty pattern */
    2987             : 
    2988           0 :     p1 = VARDATA_ANY(t1);
    2989           0 :     p2 = VARDATA_ANY(t2);
    2990             : 
    2991           0 :     pos = 0;
    2992           0 :     px = (len1 - len2);
    2993           0 :     for (p = 0; p <= px; p++)
    2994             :     {
    2995           0 :         if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
    2996             :         {
    2997           0 :             pos = p + 1;
    2998           0 :             break;
    2999             :         };
    3000           0 :         p1++;
    3001             :     };
    3002             : 
    3003           0 :     PG_RETURN_INT32(pos);
    3004             : }
    3005             : 
    3006             : /*-------------------------------------------------------------
    3007             :  * byteaGetByte
    3008             :  *
    3009             :  * this routine treats "bytea" as an array of bytes.
    3010             :  * It returns the Nth byte (a number between 0 and 255).
    3011             :  *-------------------------------------------------------------
    3012             :  */
    3013             : Datum
    3014           0 : byteaGetByte(PG_FUNCTION_ARGS)
    3015             : {
    3016           0 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3017           0 :     int32       n = PG_GETARG_INT32(1);
    3018             :     int         len;
    3019             :     int         byte;
    3020             : 
    3021           0 :     len = VARSIZE_ANY_EXHDR(v);
    3022             : 
    3023           0 :     if (n < 0 || n >= len)
    3024           0 :         ereport(ERROR,
    3025             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3026             :                  errmsg("index %d out of valid range, 0..%d",
    3027             :                         n, len - 1)));
    3028             : 
    3029           0 :     byte = ((unsigned char *) VARDATA_ANY(v))[n];
    3030             : 
    3031           0 :     PG_RETURN_INT32(byte);
    3032             : }
    3033             : 
    3034             : /*-------------------------------------------------------------
    3035             :  * byteaGetBit
    3036             :  *
    3037             :  * This routine treats a "bytea" type like an array of bits.
    3038             :  * It returns the value of the Nth bit (0 or 1).
    3039             :  *
    3040             :  *-------------------------------------------------------------
    3041             :  */
    3042             : Datum
    3043           0 : byteaGetBit(PG_FUNCTION_ARGS)
    3044             : {
    3045           0 :     bytea      *v = PG_GETARG_BYTEA_PP(0);
    3046           0 :     int32       n = PG_GETARG_INT32(1);
    3047             :     int         byteNo,
    3048             :                 bitNo;
    3049             :     int         len;
    3050             :     int         byte;
    3051             : 
    3052           0 :     len = VARSIZE_ANY_EXHDR(v);
    3053             : 
    3054           0 :     if (n < 0 || n >= len * 8)
    3055           0 :         ereport(ERROR,
    3056             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3057             :                  errmsg("index %d out of valid range, 0..%d",
    3058             :                         n, len * 8 - 1)));
    3059             : 
    3060           0 :     byteNo = n / 8;
    3061           0 :     bitNo = n % 8;
    3062             : 
    3063           0 :     byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
    3064             : 
    3065           0 :     if (byte & (1 << bitNo))
    3066           0 :         PG_RETURN_INT32(1);
    3067             :     else
    3068           0 :         PG_RETURN_INT32(0);
    3069             : }
    3070             : 
    3071             : /*-------------------------------------------------------------
    3072             :  * byteaSetByte
    3073             :  *
    3074             :  * Given an instance of type 'bytea' creates a new one with
    3075             :  * the Nth byte set to the given value.
    3076             :  *
    3077             :  *-------------------------------------------------------------
    3078             :  */
    3079             : Datum
    3080           0 : byteaSetByte(PG_FUNCTION_ARGS)
    3081             : {
    3082           0 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3083           0 :     int32       n = PG_GETARG_INT32(1);
    3084           0 :     int32       newByte = PG_GETARG_INT32(2);
    3085             :     int         len;
    3086             : 
    3087           0 :     len = VARSIZE(res) - VARHDRSZ;
    3088             : 
    3089           0 :     if (n < 0 || n >= len)
    3090           0 :         ereport(ERROR,
    3091             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3092             :                  errmsg("index %d out of valid range, 0..%d",
    3093             :                         n, len - 1)));
    3094             : 
    3095             :     /*
    3096             :      * Now set the byte.
    3097             :      */
    3098           0 :     ((unsigned char *) VARDATA(res))[n] = newByte;
    3099             : 
    3100           0 :     PG_RETURN_BYTEA_P(res);
    3101             : }
    3102             : 
    3103             : /*-------------------------------------------------------------
    3104             :  * byteaSetBit
    3105             :  *
    3106             :  * Given an instance of type 'bytea' creates a new one with
    3107             :  * the Nth bit set to the given value.
    3108             :  *
    3109             :  *-------------------------------------------------------------
    3110             :  */
    3111             : Datum
    3112           0 : byteaSetBit(PG_FUNCTION_ARGS)
    3113             : {
    3114           0 :     bytea      *res = PG_GETARG_BYTEA_P_COPY(0);
    3115           0 :     int32       n = PG_GETARG_INT32(1);
    3116           0 :     int32       newBit = PG_GETARG_INT32(2);
    3117             :     int         len;
    3118             :     int         oldByte,
    3119             :                 newByte;
    3120             :     int         byteNo,
    3121             :                 bitNo;
    3122             : 
    3123           0 :     len = VARSIZE(res) - VARHDRSZ;
    3124             : 
    3125           0 :     if (n < 0 || n >= len * 8)
    3126           0 :         ereport(ERROR,
    3127             :                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
    3128             :                  errmsg("index %d out of valid range, 0..%d",
    3129             :                         n, len * 8 - 1)));
    3130             : 
    3131           0 :     byteNo = n / 8;
    3132           0 :     bitNo = n % 8;
    3133             : 
    3134             :     /*
    3135             :      * sanity check!
    3136             :      */
    3137           0 :     if (newBit != 0 && newBit != 1)
    3138           0 :         ereport(ERROR,
    3139             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    3140             :                  errmsg("new bit must be 0 or 1")));
    3141             : 
    3142             :     /*
    3143             :      * Update the byte.
    3144             :      */
    3145           0 :     oldByte = ((unsigned char *) VARDATA(res))[byteNo];
    3146             : 
    3147           0 :     if (newBit == 0)
    3148           0 :         newByte = oldByte & (~(1 << bitNo));
    3149             :     else
    3150           0 :         newByte = oldByte | (1 << bitNo);
    3151             : 
    3152           0 :     ((unsigned char *) VARDATA(res))[byteNo] = newByte;
    3153             : 
    3154           0 :     PG_RETURN_BYTEA_P(res);
    3155             : }
    3156             : 
    3157             : 
    3158             : /* text_name()
    3159             :  * Converts a text type to a Name type.
    3160             :  */
    3161             : Datum
    3162         191 : text_name(PG_FUNCTION_ARGS)
    3163             : {
    3164         191 :     text       *s = PG_GETARG_TEXT_PP(0);
    3165             :     Name        result;
    3166             :     int         len;
    3167             : 
    3168         191 :     len = VARSIZE_ANY_EXHDR(s);
    3169             : 
    3170             :     /* Truncate oversize input */
    3171         191 :     if (len >= NAMEDATALEN)
    3172           1 :         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
    3173             : 
    3174             :     /* We use palloc0 here to ensure result is zero-padded */
    3175         191 :     result = (Name) palloc0(NAMEDATALEN);
    3176         191 :     memcpy(NameStr(*result), VARDATA_ANY(s), len);
    3177             : 
    3178         191 :     PG_RETURN_NAME(result);
    3179             : }
    3180             : 
    3181             : /* name_text()
    3182             :  * Converts a Name type to a text type.
    3183             :  */
    3184             : Datum
    3185       60410 : name_text(PG_FUNCTION_ARGS)
    3186             : {
    3187       60410 :     Name        s = PG_GETARG_NAME(0);
    3188             : 
    3189       60410 :     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
    3190             : }
    3191             : 
    3192             : 
    3193             : /*
    3194             :  * textToQualifiedNameList - convert a text object to list of names
    3195             :  *
    3196             :  * This implements the input parsing needed by nextval() and other
    3197             :  * functions that take a text parameter representing a qualified name.
    3198             :  * We split the name at dots, downcase if not double-quoted, and
    3199             :  * truncate names if they're too long.
    3200             :  */
    3201             : List *
    3202         120 : textToQualifiedNameList(text *textval)
    3203             : {
    3204             :     char       *rawname;
    3205         120 :     List       *result = NIL;
    3206             :     List       *namelist;
    3207             :     ListCell   *l;
    3208             : 
    3209             :     /* Convert to C string (handles possible detoasting). */
    3210             :     /* Note we rely on being able to modify rawname below. */
    3211         120 :     rawname = text_to_cstring(textval);
    3212             : 
    3213         120 :     if (!SplitIdentifierString(rawname, '.', &namelist))
    3214           0 :         ereport(ERROR,
    3215             :                 (errcode(ERRCODE_INVALID_NAME),
    3216             :                  errmsg("invalid name syntax")));
    3217             : 
    3218         120 :     if (namelist == NIL)
    3219           0 :         ereport(ERROR,
    3220             :                 (errcode(ERRCODE_INVALID_NAME),
    3221             :                  errmsg("invalid name syntax")));
    3222             : 
    3223         257 :     foreach(l, namelist)
    3224             :     {
    3225         137 :         char       *curname = (char *) lfirst(l);
    3226             : 
    3227         137 :         result = lappend(result, makeString(pstrdup(curname)));
    3228             :     }
    3229             : 
    3230         120 :     pfree(rawname);
    3231         120 :     list_free(namelist);
    3232             : 
    3233         120 :     return result;
    3234             : }
    3235             : 
    3236             : /*
    3237             :  * SplitIdentifierString --- parse a string containing identifiers
    3238             :  *
    3239             :  * This is the guts of textToQualifiedNameList, and is exported for use in
    3240             :  * other situations such as parsing GUC variables.  In the GUC case, it's
    3241             :  * important to avoid memory leaks, so the API is designed to minimize the
    3242             :  * amount of stuff that needs to be allocated and freed.
    3243             :  *
    3244             :  * Inputs:
    3245             :  *  rawstring: the input string; must be overwritable!  On return, it's
    3246             :  *             been modified to contain the separated identifiers.
    3247             :  *  separator: the separator punctuation expected between identifiers
    3248             :  *             (typically '.' or ',').  Whitespace may also appear around
    3249             :  *             identifiers.
    3250             :  * Outputs:
    3251             :  *  namelist: filled with a palloc'd list of pointers to identifiers within
    3252             :  *            rawstring.  Caller should list_free() this even on error return.
    3253             :  *
    3254             :  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
    3255             :  *
    3256             :  * Note that an empty string is considered okay here, though not in
    3257             :  * textToQualifiedNameList.
    3258             :  */
    3259             : bool
    3260        3189 : SplitIdentifierString(char *rawstring, char separator,
    3261             :                       List **namelist)
    3262             : {
    3263        3189 :     char       *nextp = rawstring;
    3264        3189 :     bool        done = false;
    3265             : 
    3266        3189 :     *namelist = NIL;
    3267             : 
    3268        6378 :     while (scanner_isspace(*nextp))
    3269           0 :         nextp++;                /* skip leading whitespace */
    3270             : 
    3271        3189 :     if (*nextp == '\0')
    3272         344 :         return true;            /* allow empty string */
    3273             : 
    3274             :     /* At the top of the loop, we are at start of a new identifier. */
    3275             :     do
    3276             :     {
    3277             :         char       *curname;
    3278             :         char       *endp;
    3279             : 
    3280        4753 :         if (*nextp == '"')
    3281             :         {
    3282             :             /* Quoted name --- collapse quote-quote pairs, no downcasing */
    3283         710 :             curname = nextp + 1;
    3284             :             for (;;)
    3285             :             {
    3286         710 :                 endp = strchr(nextp + 1, '"');
    3287         710 :                 if (endp == NULL)
    3288           0 :                     return false;   /* mismatched quotes */
    3289         710 :                 if (endp[1] != '"')
    3290         710 :                     break;      /* found end of quoted name */
    3291             :                 /* Collapse adjacent quotes into one quote, and look again */
    3292           0 :                 memmove(endp, endp + 1, strlen(endp));
    3293           0 :                 nextp = endp;
    3294           0 :             }
    3295             :             /* endp now points at the terminating quote */
    3296         710 :             nextp = endp + 1;
    3297             :         }
    3298             :         else
    3299             :         {
    3300             :             /* Unquoted name --- extends to separator or whitespace */
    3301             :             char       *downname;
    3302             :             int         len;
    3303             : 
    3304        4043 :             curname = nextp;
    3305       70854 :             while (*nextp && *nextp != separator &&
    3306       31384 :                    !scanner_isspace(*nextp))
    3307       31384 :                 nextp++;
    3308        4043 :             endp = nextp;
    3309        4043 :             if (curname == nextp)
    3310           0 :                 return false;   /* empty unquoted name not allowed */
    3311             : 
    3312             :             /*
    3313             :              * Downcase the identifier, using same code as main lexer does.
    3314             :              *
    3315             :              * XXX because we want to overwrite the input in-place, we cannot
    3316             :              * support a downcasing transformation that increases the string
    3317             :              * length.  This is not a problem given the current implementation
    3318             :              * of downcase_truncate_identifier, but we'll probably have to do
    3319             :              * something about this someday.
    3320             :              */
    3321        4043 :             len = endp - curname;
    3322        4043 :             downname = downcase_truncate_identifier(curname, len, false);
    3323        4043 :             Assert(strlen(downname) <= len);
    3324        4043 :             strncpy(curname, downname, len);    /* strncpy is required here */
    3325        4043 :             pfree(downname);
    3326             :         }
    3327             : 
    3328        9506 :         while (scanner_isspace(*nextp))
    3329           0 :             nextp++;            /* skip trailing whitespace */
    3330             : 
    3331        4753 :         if (*nextp == separator)
    3332             :         {
    3333        1908 :             nextp++;
    3334        5048 :             while (scanner_isspace(*nextp))
    3335        1232 :                 nextp++;        /* skip leading whitespace for next */
    3336             :             /* we expect another name, so done remains false */
    3337             :         }
    3338        2845 :         else if (*nextp == '\0')
    3339        2845 :             done = true;
    3340             :         else
    3341           0 :             return false;       /* invalid syntax */
    3342             : 
    3343             :         /* Now safe to overwrite separator with a null */
    3344        4753 :         *endp = '\0';
    3345             : 
    3346             :         /* Truncate name if it's overlength */
    3347        4753 :         truncate_identifier(curname, strlen(curname), false);
    3348             : 
    3349             :         /*
    3350             :          * Finished isolating current name --- add it to list
    3351             :          */
    3352        4753 :         *namelist = lappend(*namelist, curname);
    3353             : 
    3354             :         /* Loop back if we didn't reach end of string */
    3355        4753 :     } while (!done);
    3356             : 
    3357        2845 :     return true;
    3358             : }
    3359             : 
    3360             : 
    3361             : /*
    3362             :  * SplitDirectoriesString --- parse a string containing file/directory names
    3363             :  *
    3364             :  * This works fine on file names too; the function name is historical.
    3365             :  *
    3366             :  * This is similar to SplitIdentifierString, except that the parsing
    3367             :  * rules are meant to handle pathnames instead of identifiers: there is
    3368             :  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
    3369             :  * and we apply canonicalize_path() to each extracted string.  Because of the
    3370             :  * last, the returned strings are separately palloc'd rather than being
    3371             :  * pointers into rawstring --- but we still scribble on rawstring.
    3372             :  *
    3373             :  * Inputs:
    3374             :  *  rawstring: the input string; must be modifiable!
    3375             :  *  separator: the separator punctuation expected between directories
    3376             :  *             (typically ',' or ';').  Whitespace may also appear around
    3377             :  *             directories.
    3378             :  * Outputs:
    3379             :  *  namelist: filled with a palloc'd list of directory names.
    3380             :  *            Caller should list_free_deep() this even on error return.
    3381             :  *
    3382             :  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
    3383             :  *
    3384             :  * Note that an empty string is considered okay here.
    3385             :  */
    3386             : bool
    3387           1 : SplitDirectoriesString(char *rawstring, char separator,
    3388             :                        List **namelist)
    3389             : {
    3390           1 :     char       *nextp = rawstring;
    3391           1 :     bool        done = false;
    3392             : 
    3393           1 :     *namelist = NIL;
    3394             : 
    3395           2 :     while (scanner_isspace(*nextp))
    3396           0 :         nextp++;                /* skip leading whitespace */
    3397             : 
    3398           1 :     if (*nextp == '\0')
    3399           0 :         return true;            /* allow empty string */
    3400             : 
    3401             :     /* At the top of the loop, we are at start of a new directory. */
    3402             :     do
    3403             :     {
    3404             :         char       *curname;
    3405             :         char       *endp;
    3406             : 
    3407           1 :         if (*nextp == '"')
    3408             :         {
    3409             :             /* Quoted name --- collapse quote-quote pairs */
    3410           0 :             curname = nextp + 1;
    3411             :             for (;;)
    3412             :             {
    3413           0 :                 endp = strchr(nextp + 1, '"');
    3414           0 :                 if (endp == NULL)
    3415           0 :                     return false;   /* mismatched quotes */
    3416           0 :                 if (endp[1] != '"')
    3417           0 :                     break;      /* found end of quoted name */
    3418             :                 /* Collapse adjacent quotes into one quote, and look again */
    3419           0 :                 memmove(endp, endp + 1, strlen(endp));
    3420           0 :                 nextp = endp;
    3421           0 :             }
    3422             :             /* endp now points at the terminating quote */
    3423           0 :             nextp = endp + 1;
    3424             :         }
    3425             :         else
    3426             :         {
    3427             :             /* Unquoted name --- extends to separator or end of string */
    3428           1 :             curname = endp = nextp;
    3429          24 :             while (*nextp && *nextp != separator)
    3430             :             {
    3431             :                 /* trailing whitespace should not be included in name */
    3432          22 :                 if (!scanner_isspace(*nextp))
    3433          22 :                     endp = nextp + 1;
    3434          22 :                 nextp++;
    3435             :             }
    3436           1 :             if (curname == endp)
    3437           0 :                 return false;   /* empty unquoted name not allowed */
    3438             :         }
    3439             : 
    3440           2 :         while (scanner_isspace(*nextp))
    3441           0 :             nextp++;            /* skip trailing whitespace */
    3442             : 
    3443           1 :         if (*nextp == separator)
    3444             :         {
    3445           0 :             nextp++;
    3446           0 :             while (scanner_isspace(*nextp))
    3447           0 :                 nextp++;        /* skip leading whitespace for next */
    3448             :             /* we expect another name, so done remains false */
    3449             :         }
    3450           1 :         else if (*nextp == '\0')
    3451           1 :             done = true;
    3452             :         else
    3453           0 :             return false;       /* invalid syntax */
    3454             : 
    3455             :         /* Now safe to overwrite separator with a null */
    3456           1 :         *endp = '\0';
    3457             : 
    3458             :         /* Truncate path if it's overlength */
    3459           1 :         if (strlen(curname) >= MAXPGPATH)
    3460           0 :             curname[MAXPGPATH - 1] = '\0';
    3461             : 
    3462             :         /*
    3463             :          * Finished isolating current name --- add it to list
    3464             :          */
    3465           1 :         curname = pstrdup(curname);
    3466           1 :         canonicalize_path(curname);
    3467           1 :         *namelist = lappend(*namelist, curname);
    3468             : 
    3469             :         /* Loop back if we didn't reach end of string */
    3470           1 :     } while (!done);
    3471             : 
    3472           1 :     return true;
    3473             : }
    3474             : 
    3475             : 
    3476             : /*****************************************************************************
    3477             :  *  Comparison Functions used for bytea
    3478             :  *
    3479             :  * Note: btree indexes need these routines not to leak memory; therefore,
    3480             :  * be careful to free working copies of toasted datums.  Most places don't
    3481             :  * need to be so careful.
    3482             :  *****************************************************************************/
    3483             : 
    3484             : Datum
    3485         429 : byteaeq(PG_FUNCTION_ARGS)
    3486             : {
    3487         429 :     Datum       arg1 = PG_GETARG_DATUM(0);
    3488         429 :     Datum       arg2 = PG_GETARG_DATUM(1);
    3489             :     bool        result;
    3490             :     Size        len1,
    3491             :                 len2;
    3492             : 
    3493             :     /*
    3494             :      * We can use a fast path for unequal lengths, which might save us from
    3495             :      * having to detoast one or both values.
    3496             :      */
    3497         429 :     len1 = toast_raw_datum_size(arg1);
    3498         429 :     len2 = toast_raw_datum_size(arg2);
    3499         429 :     if (len1 != len2)
    3500           0 :         result = false;
    3501             :     else
    3502             :     {
    3503         429 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    3504         429 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    3505             : 
    3506         858 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    3507         429 :                          len1 - VARHDRSZ) == 0);
    3508             : 
    3509         429 :         PG_FREE_IF_COPY(barg1, 0);
    3510         429 :         PG_FREE_IF_COPY(barg2, 1);
    3511             :     }
    3512             : 
    3513         429 :     PG_RETURN_BOOL(result);
    3514             : }
    3515             : 
    3516             : Datum
    3517         132 : byteane(PG_FUNCTION_ARGS)
    3518             : {
    3519         132 :     Datum       arg1 = PG_GETARG_DATUM(0);
    3520         132 :     Datum       arg2 = PG_GETARG_DATUM(1);
    3521             :     bool        result;
    3522             :     Size        len1,
    3523             :                 len2;
    3524             : 
    3525             :     /*
    3526             :      * We can use a fast path for unequal lengths, which might save us from
    3527             :      * having to detoast one or both values.
    3528             :      */
    3529         132 :     len1 = toast_raw_datum_size(arg1);
    3530         132 :     len2 = toast_raw_datum_size(arg2);
    3531         132 :     if (len1 != len2)
    3532           0 :         result = true;
    3533             :     else
    3534             :     {
    3535         132 :         bytea      *barg1 = DatumGetByteaPP(arg1);
    3536         132 :         bytea      *barg2 = DatumGetByteaPP(arg2);
    3537             : 
    3538         264 :         result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
    3539         132 :                          len1 - VARHDRSZ) != 0);
    3540             : 
    3541         132 :         PG_FREE_IF_COPY(barg1, 0);
    3542         132 :         PG_FREE_IF_COPY(barg2, 1);
    3543             :     }
    3544             : 
    3545         132 :     PG_RETURN_BOOL(result);
    3546             : }
    3547             : 
    3548             : Datum
    3549         510 : bytealt(PG_FUNCTION_ARGS)
    3550             : {
    3551         510 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3552         510 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3553             :     int         len1,
    3554             :                 len2;
    3555             :     int         cmp;
    3556             : 
    3557         510 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3558         510 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3559             : 
    3560         510 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3561             : 
    3562         510 :     PG_FREE_IF_COPY(arg1, 0);
    3563         510 :     PG_FREE_IF_COPY(arg2, 1);
    3564             : 
    3565         510 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
    3566             : }
    3567             : 
    3568             : Datum
    3569         400 : byteale(PG_FUNCTION_ARGS)
    3570             : {
    3571         400 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3572         400 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3573             :     int         len1,
    3574             :                 len2;
    3575             :     int         cmp;
    3576             : 
    3577         400 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3578         400 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3579             : 
    3580         400 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3581             : 
    3582         400 :     PG_FREE_IF_COPY(arg1, 0);
    3583         400 :     PG_FREE_IF_COPY(arg2, 1);
    3584             : 
    3585         400 :     PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
    3586             : }
    3587             : 
    3588             : Datum
    3589         510 : byteagt(PG_FUNCTION_ARGS)
    3590             : {
    3591         510 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3592         510 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3593             :     int         len1,
    3594             :                 len2;
    3595             :     int         cmp;
    3596             : 
    3597         510 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3598         510 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3599             : 
    3600         510 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3601             : 
    3602         510 :     PG_FREE_IF_COPY(arg1, 0);
    3603         510 :     PG_FREE_IF_COPY(arg2, 1);
    3604             : 
    3605         510 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
    3606             : }
    3607             : 
    3608             : Datum
    3609         307 : byteage(PG_FUNCTION_ARGS)
    3610             : {
    3611         307 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3612         307 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3613             :     int         len1,
    3614             :                 len2;
    3615             :     int         cmp;
    3616             : 
    3617         307 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3618         307 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3619             : 
    3620         307 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3621             : 
    3622         307 :     PG_FREE_IF_COPY(arg1, 0);
    3623         307 :     PG_FREE_IF_COPY(arg2, 1);
    3624             : 
    3625         307 :     PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
    3626             : }
    3627             : 
    3628             : Datum
    3629           0 : byteacmp(PG_FUNCTION_ARGS)
    3630             : {
    3631           0 :     bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
    3632           0 :     bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
    3633             :     int         len1,
    3634             :                 len2;
    3635             :     int         cmp;
    3636             : 
    3637           0 :     len1 = VARSIZE_ANY_EXHDR(arg1);
    3638           0 :     len2 = VARSIZE_ANY_EXHDR(arg2);
    3639             : 
    3640           0 :     cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
    3641           0 :     if ((cmp == 0) && (len1 != len2))
    3642           0 :         cmp = (len1 < len2) ? -1 : 1;
    3643             : 
    3644           0 :     PG_FREE_IF_COPY(arg1, 0);
    3645           0 :     PG_FREE_IF_COPY(arg2, 1);
    3646             : 
    3647           0 :     PG_RETURN_INT32(cmp);
    3648             : }
    3649             : 
    3650             : Datum
    3651           1 : bytea_sortsupport(PG_FUNCTION_ARGS)
    3652             : {
    3653           1 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
    3654             :     MemoryContext oldcontext;
    3655             : 
    3656           1 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
    3657             : 
    3658             :     /* Use generic string SortSupport, forcing "C" collation */
    3659           1 :     varstr_sortsupport(ssup, C_COLLATION_OID, false);
    3660             : 
    3661           1 :     MemoryContextSwitchTo(oldcontext);
    3662             : 
    3663           1 :     PG_RETURN_VOID();
    3664             : }
    3665             : 
    3666             : /*
    3667             :  * appendStringInfoText
    3668             :  *
    3669             :  * Append a text to str.
    3670             :  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
    3671             :  */
    3672             : static void
    3673       20789 : appendStringInfoText(StringInfo str, const text *t)
    3674             : {
    3675       20789 :     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
    3676       20789 : }
    3677             : 
    3678             : /*
    3679             :  * replace_text
    3680             :  * replace all occurrences of 'old_sub_str' in 'orig_str'
    3681             :  * with 'new_sub_str' to form 'new_str'
    3682             :  *
    3683             :  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
    3684             :  * otherwise returns 'new_str'
    3685             :  */
    3686             : Datum
    3687           6 : replace_text(PG_FUNCTION_ARGS)
    3688             : {
    3689           6 :     text       *src_text = PG_GETARG_TEXT_PP(0);
    3690           6 :     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
    3691           6 :     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
    3692             :     int         src_text_len;
    3693             :     int         from_sub_text_len;
    3694             :     TextPositionState state;
    3695             :     text       *ret_text;
    3696             :     int         start_posn;
    3697             :     int         curr_posn;
    3698             :     int         chunk_len;
    3699             :     char       *start_ptr;
    3700             :     StringInfoData str;
    3701             : 
    3702           6 :     text_position_setup(src_text, from_sub_text, &state);
    3703             : 
    3704             :     /*
    3705             :      * Note: we check the converted string length, not the original, because
    3706             :      * they could be different if the input contained invalid encoding.
    3707             :      */
    3708           6 :     src_text_len = state.len1;
    3709           6 :     from_sub_text_len = state.len2;
    3710             : 
    3711             :     /* Return unmodified source string if empty source or pattern */
    3712           6 :     if (src_text_len < 1 || from_sub_text_len < 1)
    3713             :     {
    3714           0 :         text_position_cleanup(&state);
    3715           0 :         PG_RETURN_TEXT_P(src_text);
    3716             :     }
    3717             : 
    3718           6 :     start_posn = 1;
    3719           6 :     curr_posn = text_position_next(1, &state);
    3720             : 
    3721             :     /* When the from_sub_text is not found, there is nothing to do. */
    3722           6 :     if (curr_posn == 0)
    3723             :     {
    3724           2 :         text_position_cleanup(&state);
    3725           2 :         PG_RETURN_TEXT_P(src_text);
    3726             :     }
    3727             : 
    3728             :     /* start_ptr points to the start_posn'th character of src_text */
    3729           4 :     start_ptr = VARDATA_ANY(src_text);
    3730             : 
    3731           4 :     initStringInfo(&str);
    3732             : 
    3733             :     do
    3734             :     {
    3735           6 :         CHECK_FOR_INTERRUPTS();
    3736             : 
    3737             :         /* copy the data skipped over by last text_position_next() */
    3738           6 :         chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
    3739           6 :         appendBinaryStringInfo(&str, start_ptr, chunk_len);
    3740             : 
    3741           6 :         appendStringInfoText(&str, to_sub_text);
    3742             : 
    3743           6 :         start_posn = curr_posn;
    3744           6 :         start_ptr += chunk_len;
    3745           6 :         start_posn += from_sub_text_len;
    3746           6 :         start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
    3747             : 
    3748           6 :         curr_posn = text_position_next(start_posn, &state);
    3749             :     }
    3750           6 :     while (curr_posn > 0);
    3751             : 
    3752             :     /* copy trailing data */
    3753           4 :     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    3754           4 :     appendBinaryStringInfo(&str, start_ptr, chunk_len);
    3755             : 
    3756           4 :     text_position_cleanup(&state);
    3757             : 
    3758           4 :     ret_text = cstring_to_text_with_len(str.data, str.len);
    3759           4 :     pfree(str.data);
    3760             : 
    3761           4 :     PG_RETURN_TEXT_P(ret_text);
    3762             : }
    3763             : 
    3764             : /*
    3765             :  * check_replace_text_has_escape_char
    3766             :  *
    3767             :  * check whether replace_text contains escape char.
    3768             :  */
    3769             : static bool
    3770          25 : check_replace_text_has_escape_char(const text *replace_text)
    3771             : {
    3772          25 :     const char *p = VARDATA_ANY(replace_text);
    3773          25 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    3774             : 
    3775          25 :     if (pg_database_encoding_max_length() == 1)
    3776             :     {
    3777           0 :         for (; p < p_end; p++)
    3778             :         {
    3779           0 :             if (*p == '\\')
    3780           0 :                 return true;
    3781             :         }
    3782             :     }
    3783             :     else
    3784             :     {
    3785         161 :         for (; p < p_end; p += pg_mblen(p))
    3786             :         {
    3787         145 :             if (*p == '\\')
    3788           9 :                 return true;
    3789             :         }
    3790             :     }
    3791             : 
    3792          16 :     return false;
    3793             : }
    3794             : 
    3795             : /*
    3796             :  * appendStringInfoRegexpSubstr
    3797             :  *
    3798             :  * Append replace_text to str, substituting regexp back references for
    3799             :  * \n escapes.  start_ptr is the start of the match in the source string,
    3800             :  * at logical character position data_pos.
    3801             :  */
    3802             : static void
    3803           4 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
    3804             :                              regmatch_t *pmatch,
    3805             :                              char *start_ptr, int data_pos)
    3806             : {
    3807           4 :     const char *p = VARDATA_ANY(replace_text);
    3808           4 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
    3809           4 :     int         eml = pg_database_encoding_max_length();
    3810             : 
    3811             :     for (;;)
    3812             :     {
    3813          13 :         const char *chunk_start = p;
    3814             :         int         so;
    3815             :         int         eo;
    3816             : 
    3817             :         /* Find next escape char. */
    3818          13 :         if (eml == 1)
    3819             :         {
    3820           0 :             for (; p < p_end && *p != '\\'; p++)
    3821             :                  /* nothing */ ;
    3822             :         }
    3823             :         else
    3824             :         {
    3825          13 :             for (; p < p_end && *p != '\\'; p += pg_mblen(p))
    3826             :                  /* nothing */ ;
    3827             :         }
    3828             : 
    3829             :         /* Copy the text we just scanned over, if any. */
    3830          13 :         if (p > chunk_start)
    3831           9 :             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
    3832             : 
    3833             :         /* Done if at end of string, else advance over escape char. */
    3834          13 :         if (p >= p_end)
    3835           4 :             break;
    3836           9 :         p++;
    3837             : 
    3838           9 :         if (p >= p_end)
    3839             :         {
    3840             :             /* Escape at very end of input.  Treat same as unexpected char */
    3841           0 :             appendStringInfoChar(str, '\\');
    3842           0 :             break;
    3843             :         }
    3844             : 
    3845           9 :         if (*p >= '1' && *p <= '9')
    3846           9 :         {
    3847             :             /* Use the back reference of regexp. */
    3848           9 :             int         idx = *p - '0';
    3849             : 
    3850           9 :             so = pmatch[idx].rm_so;
    3851           9 :             eo = pmatch[idx].rm_eo;
    3852           9 :             p++;
    3853             :         }
    3854           0 :         else if (*p == '&')
    3855             :         {
    3856             :             /* Use the entire matched string. */
    3857           0 :             so = pmatch[0].rm_so;
    3858           0 :             eo = pmatch[0].rm_eo;
    3859           0 :             p++;
    3860             :         }
    3861           0 :         else if (*p == '\\')
    3862             :         {
    3863             :             /* \\ means transfer one \ to output. */
    3864           0 :             appendStringInfoChar(str, '\\');
    3865           0 :             p++;
    3866           0 :             continue;
    3867             :         }
    3868             :         else
    3869             :         {
    3870             :             /*
    3871             :              * If escape char is not followed by any expected char, just treat
    3872             :              * it as ordinary data to copy.  (XXX would it be better to throw
    3873             :              * an error?)
    3874             :              */
    3875           0 :             appendStringInfoChar(str, '\\');
    3876           0 :             continue;
    3877             :         }
    3878             : 
    3879           9 :         if (so != -1 && eo != -1)
    3880             :         {
    3881             :             /*
    3882             :              * Copy the text that is back reference of regexp.  Note so and eo
    3883             :              * are counted in characters not bytes.
    3884             :              */
    3885             :             char       *chunk_start;
    3886             :             int         chunk_len;
    3887             : 
    3888           9 :             Assert(so >= data_pos);
    3889           9 :             chunk_start = start_ptr;
    3890           9 :             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
    3891           9 :             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
    3892           9 :             appendBinaryStringInfo(str, chunk_start, chunk_len);
    3893             :         }
    3894           9 :     }
    3895           4 : }
    3896             : 
    3897             : #define REGEXP_REPLACE_BACKREF_CNT      10
    3898             : 
    3899             : /*
    3900             :  * replace_text_regexp
    3901             :  *
    3902             :  * replace text that matches to regexp in src_text to replace_text.
    3903             :  *
    3904             :  * Note: to avoid having to include regex.h in builtins.h, we declare
    3905             :  * the regexp argument as void *, but really it's regex_t *.
    3906             :  */
    3907             : text *
    3908          25 : replace_text_regexp(text *src_text, void *regexp,
    3909             :                     text *replace_text, bool glob)
    3910             : {
    3911             :     text       *ret_text;
    3912          25 :     regex_t    *re = (regex_t *) regexp;
    3913          25 :     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
    3914             :     StringInfoData buf;
    3915             :     regmatch_t  pmatch[REGEXP_REPLACE_BACKREF_CNT];
    3916             :     pg_wchar   *data;
    3917             :     size_t      data_len;
    3918             :     int         search_start;
    3919             :     int         data_pos;
    3920             :     char       *start_ptr;
    3921             :     bool        have_escape;
    3922             : 
    3923          25 :     initStringInfo(&buf);
    3924             : 
    3925             :     /* Convert data string to wide characters. */
    3926          25 :     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
    3927          25 :     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
    3928             : 
    3929             :     /* Check whether replace_text has escape char. */
    3930          25 :     have_escape = check_replace_text_has_escape_char(replace_text);
    3931             : 
    3932             :     /* start_ptr points to the data_pos'th character of src_text */
    3933          25 :     start_ptr = (char *) VARDATA_ANY(src_text);
    3934          25 :     data_pos = 0;
    3935             : 
    3936          25 :     search_start = 0;
    3937          57 :     while (search_start <= data_len)
    3938             :     {
    3939             :         int         regexec_result;
    3940             : 
    3941          31 :         CHECK_FOR_INTERRUPTS();
    3942             : 
    3943          31 :         regexec_result = pg_regexec(re,
    3944             :                                     data,
    3945             :                                     data_len,
    3946             :                                     search_start,
    3947             :                                     NULL,   /* no details */
    3948             :                                     REGEXP_REPLACE_BACKREF_CNT,
    3949             :                                     pmatch,
    3950             :                                     0);
    3951             : 
    3952          31 :         if (regexec_result == REG_NOMATCH)
    3953          19 :             break;
    3954             : 
    3955          12 :         if (regexec_result != REG_OKAY)
    3956             :         {
    3957             :             char        errMsg[100];
    3958             : 
    3959           0 :             CHECK_FOR_INTERRUPTS();
    3960           0 :             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
    3961           0 :             ereport(ERROR,
    3962             :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
    3963             :                      errmsg("regular expression failed: %s", errMsg)));
    3964             :         }
    3965             : 
    3966             :         /*
    3967             :          * Copy the text to the left of the match position.  Note we are given
    3968             :          * character not byte indexes.
    3969             :          */
    3970          12 :         if (pmatch[0].rm_so - data_pos > 0)
    3971             :         {
    3972             :             int         chunk_len;
    3973             : 
    3974           6 :             chunk_len = charlen_to_bytelen(start_ptr,
    3975           6 :                                            pmatch[0].rm_so - data_pos);
    3976           6 :             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    3977             : 
    3978             :             /*
    3979             :              * Advance start_ptr over that text, to avoid multiple rescans of
    3980             :              * it if the replace_text contains multiple back-references.
    3981             :              */
    3982           6 :             start_ptr += chunk_len;
    3983           6 :             data_pos = pmatch[0].rm_so;
    3984             :         }
    3985             : 
    3986             :         /*
    3987             :          * Copy the replace_text. Process back references when the
    3988             :          * replace_text has escape characters.
    3989             :          */
    3990          12 :         if (have_escape)
    3991           4 :             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
    3992             :                                          start_ptr, data_pos);
    3993             :         else
    3994           8 :             appendStringInfoText(&buf, replace_text);
    3995             : 
    3996             :         /* Advance start_ptr and data_pos over the matched text. */
    3997          12 :         start_ptr += charlen_to_bytelen(start_ptr,
    3998          12 :                                         pmatch[0].rm_eo - data_pos);
    3999          12 :         data_pos = pmatch[0].rm_eo;
    4000             : 
    4001             :         /*
    4002             :          * When global option is off, replace the first instance only.
    4003             :          */
    4004          12 :         if (!glob)
    4005           5 :             break;
    4006             : 
    4007             :         /*
    4008             :          * Advance search position.  Normally we start the next search at the
    4009             :          * end of the previous match; but if the match was of zero length, we
    4010             :          * have to advance by one character, or we'd just find the same match
    4011             :          * again.
    4012             :          */
    4013           7 :         search_start = data_pos;
    4014           7 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
    4015           2 :             search_start++;
    4016             :     }
    4017             : 
    4018             :     /*
    4019             :      * Copy the text to the right of the last match.
    4020             :      */
    4021          25 :     if (data_pos < data_len)
    4022             :     {
    4023             :         int         chunk_len;
    4024             : 
    4025          17 :         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
    4026          17 :         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
    4027             :     }
    4028             : 
    4029          25 :     ret_text = cstring_to_text_with_len(buf.data, buf.len);
    4030          25 :     pfree(buf.data);
    4031          25 :     pfree(data);
    4032             : 
    4033          25 :     return ret_text;
    4034             : }
    4035             : 
    4036             : /*
    4037             :  * split_text
    4038             :  * parse input string
    4039             :  * return ord item (1 based)
    4040             :  * based on provided field separator
    4041             :  */
    4042             : Datum
    4043           5 : split_text(PG_FUNCTION_ARGS)
    4044             : {
    4045           5 :     text       *inputstring = PG_GETARG_TEXT_PP(0);
    4046           5 :     text       *fldsep = PG_GETARG_TEXT_PP(1);
    4047           5 :     int         fldnum = PG_GETARG_INT32(2);
    4048             :     int         inputstring_len;
    4049             :     int         fldsep_len;
    4050             :     TextPositionState state;
    4051             :     int         start_posn;
    4052             :     int         end_posn;
    4053             :     text       *result_text;
    4054             : 
    4055             :     /* field number is 1 based */
    4056           5 :     if (fldnum < 1)
    4057           1 :         ereport(ERROR,
    4058             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    4059             :                  errmsg("field position must be greater than zero")));
    4060             : 
    4061           4 :     text_position_setup(inputstring, fldsep, &state);
    4062             : 
    4063             :     /*
    4064             :      * Note: we check the converted string length, not the original, because
    4065             :      * they could be different if the input contained invalid encoding.
    4066             :      */
    4067           4 :     inputstring_len = state.len1;
    4068           4 :     fldsep_len = state.len2;
    4069             : 
    4070             :     /* return empty string for empty input string */
    4071           4 :     if (inputstring_len < 1)
    4072             :     {
    4073           0 :         text_position_cleanup(&state);
    4074           0 :         PG_RETURN_TEXT_P(cstring_to_text(""));
    4075             :     }
    4076             : 
    4077             :     /* empty field separator */
    4078           4 :     if (fldsep_len < 1)
    4079             :     {
    4080           0 :         text_position_cleanup(&state);
    4081             :         /* if first field, return input string, else empty string */
    4082           0 :         if (fldnum == 1)
    4083           0 :             PG_RETURN_TEXT_P(inputstring);
    4084             :         else
    4085           0 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4086             :     }
    4087             : 
    4088             :     /* identify bounds of first field */
    4089           4 :     start_posn = 1;
    4090           4 :     end_posn = text_position_next(1, &state);
    4091             : 
    4092             :     /* special case if fldsep not found at all */
    4093           4 :     if (end_posn == 0)
    4094             :     {
    4095           0 :         text_position_cleanup(&state);
    4096             :         /* if field 1 requested, return input string, else empty string */
    4097           0 :         if (fldnum == 1)
    4098           0 :             PG_RETURN_TEXT_P(inputstring);
    4099             :         else
    4100           0 :             PG_RETURN_TEXT_P(cstring_to_text(""));
    4101             :     }
    4102             : 
    4103          11 :     while (end_posn > 0 && --fldnum > 0)
    4104             :     {
    4105             :         /* identify bounds of next field */
    4106           3 :         start_posn = end_posn + fldsep_len;
    4107           3 :         end_posn = text_position_next(start_posn, &state);
    4108             :     }
    4109             : 
    4110           4 :     text_position_cleanup(&state);
    4111             : 
    4112           4 :     if (fldnum > 0)
    4113             :     {
    4114             :         /* N'th field separator not found */
    4115             :         /* if last field requested, return it, else empty string */
    4116           2 :         if (fldnum == 1)
    4117           1 :             result_text = text_substring(PointerGetDatum(inputstring),
    4118             :                                          start_posn,
    4119             :                                          -1,
    4120             :                                          true);
    4121             :         else
    4122           1 :             result_text = cstring_to_text("");
    4123             :     }
    4124             :     else
    4125             :     {
    4126             :         /* non-last field requested */
    4127           2 :         result_text = text_substring(PointerGetDatum(inputstring),
    4128             :                                      start_posn,
    4129             :                                      end_posn - start_posn,
    4130             :                                      false);
    4131             :     }
    4132             : 
    4133           4 :     PG_RETURN_TEXT_P(result_text);
    4134             : }
    4135             : 
    4136             : /*
    4137             :  * Convenience function to return true when two text params are equal.
    4138             :  */
    4139             : static bool
    4140          14 : text_isequal(text *txt1, text *txt2)
    4141             : {
    4142          14 :     return DatumGetBool(DirectFunctionCall2(texteq,
    4143             :                                             PointerGetDatum(txt1),
    4144             :                                             PointerGetDatum(txt2)));
    4145             : }
    4146             : 
    4147             : /*
    4148             :  * text_to_array
    4149             :  * parse input string and return text array of elements,
    4150             :  * based on provided field separator
    4151             :  */
    4152             : Datum
    4153          14 : text_to_array(PG_FUNCTION_ARGS)
    4154             : {
    4155          14 :     return text_to_array_internal(fcinfo);
    4156             : }
    4157             : 
    4158             : /*
    4159             :  * text_to_array_null
    4160             :  * parse input string and return text array of elements,
    4161             :  * based on provided field separator and null string
    4162             :  *
    4163             :  * This is a separate entry point only to prevent the regression tests from
    4164             :  * complaining about different argument sets for the same internal function.
    4165             :  */
    4166             : Datum
    4167           4 : text_to_array_null(PG_FUNCTION_ARGS)
    4168             : {
    4169           4 :     return text_to_array_internal(fcinfo);
    4170             : }
    4171             : 
    4172             : /*
    4173             :  * common code for text_to_array and text_to_array_null functions
    4174             :  *
    4175             :  * These are not strict so we have to test for null inputs explicitly.
    4176             :  */
    4177             : static Datum
    4178          18 : text_to_array_internal(PG_FUNCTION_ARGS)
    4179             : {
    4180             :     text       *inputstring;
    4181             :     text       *fldsep;
    4182             :     text       *null_string;
    4183             :     int         inputstring_len;
    4184             :     int         fldsep_len;
    4185             :     char       *start_ptr;
    4186             :     text       *result_text;
    4187             :     bool        is_null;
    4188          18 :     ArrayBuildState *astate = NULL;
    4189             : 
    4190             :     /* when input string is NULL, then result is NULL too */
    4191          18 :     if (PG_ARGISNULL(0))
    4192           1 :         PG_RETURN_NULL();
    4193             : 
    4194          17 :     inputstring = PG_GETARG_TEXT_PP(0);
    4195             : 
    4196             :     /* fldsep can be NULL */
    4197          17 :     if (!PG_ARGISNULL(1))
    4198          16 :         fldsep = PG_GETARG_TEXT_PP(1);
    4199             :     else
    4200           1 :         fldsep = NULL;
    4201             : 
    4202             :     /* null_string can be NULL or omitted */
    4203          17 :     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
    4204           4 :         null_string = PG_GETARG_TEXT_PP(2);
    4205             :     else
    4206          13 :         null_string = NULL;
    4207             : 
    4208          17 :     if (fldsep != NULL)
    4209             :     {
    4210             :         /*
    4211             :          * Normal case with non-null fldsep.  Use the text_position machinery
    4212             :          * to search for occurrences of fldsep.
    4213             :          */
    4214             :         TextPositionState state;
    4215             :         int         fldnum;
    4216             :         int         start_posn;
    4217             :         int         end_posn;
    4218             :         int         chunk_len;
    4219             : 
    4220          16 :         text_position_setup(inputstring, fldsep, &state);
    4221             : 
    4222             :         /*
    4223             :          * Note: we check the converted string length, not the original,
    4224             :          * because they could be different if the input contained invalid
    4225             :          * encoding.
    4226             :          */
    4227          16 :         inputstring_len = state.len1;
    4228          16 :         fldsep_len = state.len2;
    4229             : 
    4230             :         /* return empty array for empty input string */
    4231          16 :         if (inputstring_len < 1)
    4232             :         {
    4233           1 :             text_position_cleanup(&state);
    4234           7 :             PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4235             :         }
    4236             : 
    4237             :         /*
    4238             :          * empty field separator: return the input string as a one-element
    4239             :          * array
    4240             :          */
    4241          15 :         if (fldsep_len < 1)
    4242             :         {
    4243             :             Datum       elems[1];
    4244             :             bool        nulls[1];
    4245             :             int         dims[1];
    4246             :             int         lbs[1];
    4247             : 
    4248           5 :             text_position_cleanup(&state);
    4249             :             /* single element can be a NULL too */
    4250           5 :             is_null = null_string ? text_isequal(inputstring, null_string) : false;
    4251             : 
    4252           5 :             elems[0] = PointerGetDatum(inputstring);
    4253           5 :             nulls[0] = is_null;
    4254           5 :             dims[0] = 1;
    4255           5 :             lbs[0] = 1;
    4256             :             /* XXX: this hardcodes assumptions about the text type */
    4257           5 :             PG_RETURN_ARRAYTYPE_P(construct_md_array(elems, nulls,
    4258             :                                                      1, dims, lbs,
    4259             :                                                      TEXTOID, -1, false, 'i'));
    4260             :         }
    4261             : 
    4262          10 :         start_posn = 1;
    4263             :         /* start_ptr points to the start_posn'th character of inputstring */
    4264          10 :         start_ptr = VARDATA_ANY(inputstring);
    4265             : 
    4266          62 :         for (fldnum = 1;; fldnum++) /* field number is 1 based */
    4267             :         {
    4268          62 :             CHECK_FOR_INTERRUPTS();
    4269             : 
    4270          62 :             end_posn = text_position_next(start_posn, &state);
    4271             : 
    4272          62 :             if (end_posn == 0)
    4273             :             {
    4274             :                 /* fetch last field */
    4275          10 :                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
    4276             :             }
    4277             :             else
    4278             :             {
    4279             :                 /* fetch non-last field */
    4280          52 :                 chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
    4281             :             }
    4282             : 
    4283             :             /* must build a temp text datum to pass to accumArrayResult */
    4284          62 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4285          62 :             is_null = null_string ? text_isequal(result_text, null_string) : false;
    4286             : 
    4287             :             /* stash away this field */
    4288          62 :             astate = accumArrayResult(astate,
    4289             :                                       PointerGetDatum(result_text),
    4290             :                                       is_null,
    4291             :                                       TEXTOID,
    4292             :                                       CurrentMemoryContext);
    4293             : 
    4294          62 :             pfree(result_text);
    4295             : 
    4296          62 :             if (end_posn == 0)
    4297          10 :                 break;
    4298             : 
    4299          52 :             start_posn = end_posn;
    4300          52 :             start_ptr += chunk_len;
    4301          52 :             start_posn += fldsep_len;
    4302          52 :             start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
    4303          52 :         }
    4304             : 
    4305          10 :         text_position_cleanup(&state);
    4306             :     }
    4307             :     else
    4308             :     {
    4309             :         /*
    4310             :          * When fldsep is NULL, each character in the inputstring becomes an
    4311             :          * element in the result array.  The separator is effectively the
    4312             :          * space between characters.
    4313             :          */
    4314           1 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
    4315             : 
    4316             :         /* return empty array for empty input string */
    4317           1 :         if (inputstring_len < 1)
    4318           0 :             PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
    4319             : 
    4320           1 :         start_ptr = VARDATA_ANY(inputstring);
    4321             : 
    4322           7 :         while (inputstring_len > 0)
    4323             :         {
    4324           5 :             int         chunk_len = pg_mblen(start_ptr);
    4325             : 
    4326           5 :             CHECK_FOR_INTERRUPTS();
    4327             : 
    4328             :             /* must build a temp text datum to pass to accumArrayResult */
    4329           5 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
    4330           5 :             is_null = null_string ? text_isequal(result_text, null_string) : false;
    4331             : 
    4332             :             /* stash away this field */
    4333           5 :             astate = accumArrayResult(astate,
    4334             :                                       PointerGetDatum(result_text),
    4335             :                                       is_null,
    4336             :                                       TEXTOID,
    4337             :                                       CurrentMemoryContext);
    4338             : 
    4339           5 :             pfree(result_text);
    4340             : 
    4341           5 :             start_ptr += chunk_len;
    4342           5 :             inputstring_len -= chunk_len;
    4343             :         }
    4344             :     }
    4345             : 
    4346          11 :     PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
    4347             :                                           CurrentMemoryContext));
    4348             : }
    4349             : 
    4350             : /*
    4351             :  * array_to_text
    4352             :  * concatenate Cstring representation of input array elements
    4353             :  * using provided field separator
    4354             :  */
    4355             : Datum
    4356         428 : array_to_text(PG_FUNCTION_ARGS)
    4357             : {
    4358         428 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
    4359         428 :     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4360             : 
    4361         428 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
    4362             : }
    4363             : 
    4364             : /*
    4365             :  * array_to_text_null
    4366             :  * concatenate Cstring representation of input array elements
    4367             :  * using provided field separator and null string
    4368             :  *
    4369             :  * This version is not strict so we have to test for null inputs explicitly.
    4370             :  */
    4371             : Datum
    4372           2 : array_to_text_null(PG_FUNCTION_ARGS)
    4373             : {
    4374             :     ArrayType  *v;
    4375             :     char       *fldsep;
    4376             :     char       *null_string;
    4377             : 
    4378             :     /* returns NULL when first or second parameter is NULL */
    4379           2 :     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
    4380           0 :         PG_RETURN_NULL();
    4381             : 
    4382           2 :     v = PG_GETARG_ARRAYTYPE_P(0);
    4383           2 :     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
    4384             : 
    4385             :     /* NULL null string is passed through as a null pointer */
    4386           2 :     if (!PG_ARGISNULL(2))
    4387           1 :         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
    4388             :     else
    4389           1 :         null_string = NULL;
    4390             : 
    4391           2 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
    4392             : }
    4393             : 
    4394             : /*
    4395             :  * common code for array_to_text and array_to_text_null functions
    4396             :  */
    4397             : static text *
    4398         433 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
    4399             :                        const char *fldsep, const char *null_string)
    4400             : {
    4401             :     text       *result;
    4402             :     int         nitems,
    4403             :                *dims,
    4404             :                 ndims;
    4405             :     Oid         element_type;
    4406             :     int         typlen;
    4407             :     bool        typbyval;
    4408             :     char        typalign;
    4409             :     StringInfoData buf;
    4410         433 :     bool        printed = false;
    4411             :     char       *p;
    4412             :     bits8      *bitmap;
    4413             :     int         bitmask;
    4414             :     int         i;
    4415             :     ArrayMetaState *my_extra;
    4416             : 
    4417         433 :     ndims = ARR_NDIM(v);
    4418         433 :     dims = ARR_DIMS(v);
    4419         433 :     nitems = ArrayGetNItems(ndims, dims);
    4420             : 
    4421             :     /* if there are no elements, return an empty string */
    4422         433 :     if (nitems == 0)
    4423         160 :         return cstring_to_text_with_len("", 0);
    4424             : 
    4425         273 :     element_type = ARR_ELEMTYPE(v);
    4426         273 :     initStringInfo(&buf);
    4427             : 
    4428             :     /*
    4429             :      * We arrange to look up info about element type, including its output
    4430             :      * conversion proc, only once per series of calls, assuming the element
    4431             :      * type doesn't change underneath us.
    4432             :      */
    4433         273 :     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4434         273 :     if (my_extra == NULL)
    4435             :     {
    4436         144 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    4437             :                                                       sizeof(ArrayMetaState));
    4438         144 :         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
    4439         144 :         my_extra->element_type = ~element_type;
    4440             :     }
    4441             : 
    4442         273 :     if (my_extra->element_type != element_type)
    4443             :     {
    4444             :         /*
    4445             :          * Get info about element type, including its output conversion proc
    4446             :          */
    4447         144 :         get_type_io_data(element_type, IOFunc_output,
    4448             :                          &my_extra->typlen, &my_extra->typbyval,
    4449             :                          &my_extra->typalign, &my_extra->typdelim,
    4450             :                          &my_extra->typioparam, &my_extra->typiofunc);
    4451         144 :         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
    4452         144 :                       fcinfo->flinfo->fn_mcxt);
    4453         144 :         my_extra->element_type = element_type;
    4454             :     }
    4455         273 :     typlen = my_extra->typlen;
    4456         273 :     typbyval = my_extra->typbyval;
    4457         273 :     typalign = my_extra->typalign;
    4458             : 
    4459         273 :     p = ARR_DATA_PTR(v);
    4460         273 :     bitmap = ARR_NULLBITMAP(v);
    4461         273 :     bitmask = 1;
    4462             : 
    4463        1280 :     for (i = 0; i < nitems; i++)
    4464             :     {
    4465             :         Datum       itemvalue;
    4466             :         char       *value;
    4467             : 
    4468             :         /* Get source element, checking for NULL */
    4469        1007 :         if (bitmap && (*bitmap & bitmask) == 0)
    4470             :         {
    4471             :             /* if null_string is NULL, we just ignore null elements */
    4472           6 :             if (null_string != NULL)
    4473             :             {
    4474           1 :                 if (printed)
    4475           1 :                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
    4476             :                 else
    4477           0 :                     appendStringInfoString(&buf, null_string);
    4478           1 :                 printed = true;
    4479             :             }
    4480             :         }
    4481             :         else
    4482             :         {
    4483        1004 :             itemvalue = fetch_att(p, typbyval, typlen);
    4484             : 
    4485        1004 :             value = OutputFunctionCall(&my_extra->proc, itemvalue);
    4486             : 
    4487        1004 :             if (printed)
    4488         731 :                 appendStringInfo(&buf, "%s%s", fldsep, value);
    4489             :             else
    4490         273 :                 appendStringInfoString(&buf, value);
    4491        1004 :             printed = true;
    4492             : 
    4493        1004 :             p = att_addlength_pointer(p, typlen, p);
    4494        1004 :             p = (char *) att_align_nominal(p, typalign);
    4495             :         }
    4496             : 
    4497             :         /* advance bitmap pointer if any */
    4498        1007 :         if (bitmap)
    4499             :         {
    4500          18 :             bitmask <<= 1;
    4501          18 :             if (bitmask == 0x100)
    4502             :             {
    4503           0 :                 bitmap++;
    4504           0 :                 bitmask = 1;
    4505             :             }
    4506             :         }
    4507             :     }
    4508             : 
    4509         273 :     result = cstring_to_text_with_len(buf.data, buf.len);
    4510         273 :     pfree(buf.data);
    4511             : 
    4512         273 :     return result;
    4513             : }
    4514             : 
    4515             : #define HEXBASE 16
    4516             : /*
    4517             :  * Convert an int32 to a string containing a base 16 (hex) representation of
    4518             :  * the number.
    4519             :  */
    4520             : Datum
    4521         331 : to_hex32(PG_FUNCTION_ARGS)
    4522             : {
    4523         331 :     uint32      value = (uint32) PG_GETARG_INT32(0);
    4524             :     char       *ptr;
    4525         331 :     const char *digits = "0123456789abcdef";
    4526             :     char        buf[32];        /* bigger than needed, but reasonable */
    4527             : 
    4528         331 :     ptr = buf + sizeof(buf) - 1;
    4529         331 :     *ptr = '\0';
    4530             : 
    4531             :     do
    4532             :     {
    4533         618 :         *--ptr = digits[value % HEXBASE];
    4534         618 :         value /= HEXBASE;
    4535         618 :     } while (ptr > buf && value);
    4536             : 
    4537         331 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    4538             : }
    4539             : 
    4540             : /*
    4541             :  * Convert an int64 to a string containing a base 16 (hex) representation of
    4542             :  * the number.
    4543             :  */
    4544             : Datum
    4545           1 : to_hex64(PG_FUNCTION_ARGS)
    4546             : {
    4547           1 :     uint64      value = (uint64) PG_GETARG_INT64(0);
    4548             :     char       *ptr;
    4549           1 :     const char *digits = "0123456789abcdef";
    4550             :     char        buf[32];        /* bigger than needed, but reasonable */
    4551             : 
    4552           1 :     ptr = buf + sizeof(buf) - 1;
    4553           1 :     *ptr = '\0';
    4554             : 
    4555             :     do
    4556             :     {
    4557           8 :         *--ptr = digits[value % HEXBASE];
    4558           8 :         value /= HEXBASE;
    4559           8 :     } while (ptr > buf && value);
    4560             : 
    4561           1 :     PG_RETURN_TEXT_P(cstring_to_text(ptr));
    4562             : }
    4563             : 
    4564             : /*
    4565             :  * Create an md5 hash of a text string and return it as hex
    4566             :  *
    4567             :  * md5 produces a 16 byte (128 bit) hash; double it for hex
    4568             :  */
    4569             : #define MD5_HASH_LEN  32
    4570             : 
    4571             : Datum
    4572         188 : md5_text(PG_FUNCTION_ARGS)
    4573             : {
    4574         188 :     text       *in_text = PG_GETARG_TEXT_PP(0);
    4575             :     size_t      len;
    4576             :     char        hexsum[MD5_HASH_LEN + 1];
    4577             : 
    4578             :     /* Calculate the length of the buffer using varlena metadata */
    4579         188 :     len = VARSIZE_ANY_EXHDR(in_text);
    4580             : 
    4581             :     /* get the hash result */
    4582         188 :     if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
    4583           0 :         ereport(ERROR,
    4584             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
    4585             :                  errmsg("out of memory")));
    4586             : 
    4587             :     /* convert to text and return it */
    4588         188 :     PG_RETURN_TEXT_P(cstring_to_text(hexsum));
    4589             : }
    4590             : 
    4591             : /*
    4592             :  * Create an md5 hash of a bytea field and return it as a hex string:
    4593             :  * 16-byte md5 digest is represented in 32 hex characters.
    4594             :  */
    4595             : Datum
    4596           9 : md5_bytea(PG_FUNCTION_ARGS)
    4597             : {
    4598           9 :     bytea      *in = PG_GETARG_BYTEA_PP(0);
    4599             :     size_t      len;
    4600             :     char        hexsum[MD5_HASH_LEN + 1];
    4601             : 
    4602           9 :     len = VARSIZE_ANY_EXHDR(in);
    4603           9 :     if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
    4604           0 :         ereport(ERROR,
    4605             :                 (errcode(ERRCODE_OUT_OF_MEMORY),
    4606             :                  errmsg("out of memory")));
    4607             : 
    4608           9 :     PG_RETURN_TEXT_P(cstring_to_text(hexsum));
    4609             : }
    4610             : 
    4611             : /*
    4612             :  * Return the size of a datum, possibly compressed
    4613             :  *
    4614             :  * Works on any data type
    4615             :  */
    4616             : Datum
    4617          10 : pg_column_size(PG_FUNCTION_ARGS)
    4618             : {
    4619          10 :     Datum       value = PG_GETARG_DATUM(0);
    4620             :     int32       result;
    4621             :     int         typlen;
    4622             : 
    4623             :     /* On first call, get the input type's typlen, and save at *fn_extra */
    4624          10 :     if (fcinfo->flinfo->fn_extra == NULL)
    4625             :     {
    4626             :         /* Lookup the datatype of the supplied argument */
    4627          10 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
    4628             : 
    4629          10 :         typlen = get_typlen(argtypeid);
    4630          10 :         if (typlen == 0)        /* should not happen */
    4631           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
    4632             : 
    4633          10 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
    4634             :                                                       sizeof(int));
    4635          10 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
    4636             :     }
    4637             :     else
    4638           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
    4639             : 
    4640          10 :     if (typlen == -1)
    4641             :     {
    4642             :         /* varlena type, possibly toasted */
    4643          10 :         result = toast_datum_size(value);
    4644             :     }
    4645           0 :     else if (typlen == -2)
    4646             :     {
    4647             :         /* cstring */
    4648           0 :         result = strlen(DatumGetCString(value)) + 1;
    4649             :     }
    4650             :     else
    4651             :     {
    4652             :         /* ordinary fixed-width type */
    4653           0 :         result = typlen;
    4654             :     }
    4655             : 
    4656          10 :     PG_RETURN_INT32(result);
    4657             : }
    4658             : 
    4659             : /*
    4660             :  * string_agg - Concatenates values and returns string.
    4661             :  *
    4662             :  * Syntax: string_agg(value text, delimiter text) RETURNS text
    4663             :  *
    4664             :  * Note: Any NULL values are ignored. The first-call delimiter isn't
    4665             :  * actually used at all, and on subsequent calls the delimiter precedes
    4666             :  * the associated value.
    4667             :  */
    4668             : 
    4669             : /* subroutine to initialize state */
    4670             : static StringInfo
    4671         111 : makeStringAggState(FunctionCallInfo fcinfo)
    4672             : {
    4673             :     StringInfo  state;
    4674             :     MemoryContext aggcontext;
    4675             :     MemoryContext oldcontext;
    4676             : 
    4677         111 :     if (!AggCheckCallContext(fcinfo, &aggcontext))
    4678             :     {
    4679             :         /* cannot be called directly because of internal-type argument */
    4680           0 :         elog(ERROR, "string_agg_transfn called in non-aggregate context");
    4681             :     }
    4682             : 
    4683             :     /*
    4684             :      * Create state in aggregate context.  It'll stay there across subsequent
    4685             :      * calls.
    4686             :      */
    4687         111 :     oldcontext = MemoryContextSwitchTo(aggcontext);
    4688         111 :     state = makeStringInfo();
    4689         111 :     MemoryContextSwitchTo(oldcontext);
    4690             : 
    4691         111 :     return state;
    4692             : }
    4693             : 
    4694             : Datum
    4695       10449 : string_agg_transfn(PG_FUNCTION_ARGS)
    4696             : {
    4697             :     StringInfo  state;
    4698             : 
    4699       10449 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    4700             : 
    4701             :     /* Append the value unless null. */
    4702       10449 :     if (!PG_ARGISNULL(1))
    4703             :     {
    4704             :         /* On the first time through, we ignore the delimiter. */
    4705       10441 :         if (state == NULL)
    4706         107 :             state = makeStringAggState(fcinfo);
    4707       10334 :         else if (!PG_ARGISNULL(2))
    4708       10334 :             appendStringInfoText(state, PG_GETARG_TEXT_PP(2));  /* delimiter */
    4709             : 
    4710       10441 :         appendStringInfoText(state, PG_GETARG_TEXT_PP(1));  /* value */
    4711             :     }
    4712             : 
    4713             :     /*
    4714             :      * The transition type for string_agg() is declared to be "internal",
    4715             :      * which is a pass-by-value type the same size as a pointer.
    4716             :      */
    4717       10449 :     PG_RETURN_POINTER(state);
    4718             : }
    4719             : 
    4720             : Datum
    4721         114 : string_agg_finalfn(PG_FUNCTION_ARGS)
    4722             : {
    4723             :     StringInfo  state;
    4724             : 
    4725             :     /* cannot be called directly because of internal-type argument */
    4726         114 :     Assert(AggCheckCallContext(fcinfo, NULL));
    4727             : 
    4728         114 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
    4729             : 
    4730         114 :     if (state != NULL)
    4731         107 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
    4732             :     else
    4733           7 :         PG_RETURN_NULL();
    4734             : }
    4735             : 
    4736             : /*
    4737             :  * Implementation of both concat() and concat_ws().
    4738             :  *
    4739             :  * sepstr is the separator string to place between values.
    4740             :  * argidx identifies the first argument to concatenate (counting from zero).
    4741             :  * Returns NULL if result should be NULL, else text value.
    4742             :  */
    4743             : static text *
    4744          11 : concat_internal(const char *sepstr, int argidx,
    4745             :                 FunctionCallInfo fcinfo)
    4746             : {
    4747             :     text       *result;
    4748             :     StringInfoData str;
    4749          11 :     bool        first_arg = true;
    4750             :     int         i;
    4751             : 
    4752             :     /*
    4753             :      * concat(VARIADIC some-array) is essentially equivalent to
    4754             :      * array_to_text(), ie concat the array elements with the given separator.
    4755             :      * So we just pass the case off to that code.
    4756             :      */
    4757          11 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    4758             :     {
    4759             :         ArrayType  *arr;
    4760             : 
    4761             :         /* Should have just the one argument */
    4762           5 :         Assert(argidx == PG_NARGS() - 1);
    4763             : 
    4764             :         /* concat(VARIADIC NULL) is defined as NULL */
    4765           5 :         if (PG_ARGISNULL(argidx))
    4766           2 :             return NULL;
    4767             : 
    4768             :         /*
    4769             :          * Non-null argument had better be an array.  We assume that any call
    4770             :          * context that could let get_fn_expr_variadic return true will have
    4771             :          * checked that a VARIADIC-labeled parameter actually is an array.  So
    4772             :          * it should be okay to just Assert that it's an array rather than
    4773             :          * doing a full-fledged error check.
    4774             :          */
    4775           3 :         Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
    4776             : 
    4777             :         /* OK, safe to fetch the array value */
    4778           3 :         arr = PG_GETARG_ARRAYTYPE_P(argidx);
    4779             : 
    4780             :         /*
    4781             :          * And serialize the array.  We tell array_to_text to ignore null
    4782             :          * elements, which matches the behavior of the loop below.
    4783             :          */
    4784           3 :         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
    4785             :     }
    4786             : 
    4787             :     /* Normal case without explicit VARIADIC marker */
    4788           6 :     initStringInfo(&str);
    4789             : 
    4790          30 :     for (i = argidx; i < PG_NARGS(); i++)
    4791             :     {
    4792          24 :         if (!PG_ARGISNULL(i))
    4793             :         {
    4794          22 :             Datum       value = PG_GETARG_DATUM(i);
    4795             :             Oid         valtype;
    4796             :             Oid         typOutput;
    4797             :             bool        typIsVarlena;
    4798             : 
    4799             :             /* add separator if appropriate */
    4800          22 :             if (first_arg)
    4801           6 :                 first_arg = false;
    4802             :             else
    4803          16 :                 appendStringInfoString(&str, sepstr);
    4804             : 
    4805             :             /* call the appropriate type output function, append the result */
    4806          22 :             valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
    4807          22 :             if (!OidIsValid(valtype))
    4808           0 :                 elog(ERROR, "could not determine data type of concat() input");
    4809          22 :             getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
    4810          44 :             appendStringInfoString(&str,
    4811          22 :                                    OidOutputFunctionCall(typOutput, value));
    4812             :         }
    4813             :     }
    4814             : 
    4815           6 :     result = cstring_to_text_with_len(str.data, str.len);
    4816           6 :     pfree(str.data);
    4817             : 
    4818           6 :     return result;
    4819             : }
    4820             : 
    4821             : /*
    4822             :  * Concatenate all arguments. NULL arguments are ignored.
    4823             :  */
    4824             : Datum
    4825           5 : text_concat(PG_FUNCTION_ARGS)
    4826             : {
    4827             :     text       *result;
    4828             : 
    4829           5 :     result = concat_internal("", 0, fcinfo);
    4830           5 :     if (result == NULL)
    4831           1 :         PG_RETURN_NULL();
    4832           4 :     PG_RETURN_TEXT_P(result);
    4833             : }
    4834             : 
    4835             : /*
    4836             :  * Concatenate all but first argument value with separators. The first
    4837             :  * parameter is used as the separator. NULL arguments are ignored.
    4838             :  */
    4839             : Datum
    4840           7 : text_concat_ws(PG_FUNCTION_ARGS)
    4841             : {
    4842             :     char       *sep;
    4843             :     text       *result;
    4844             : 
    4845             :     /* return NULL when separator is NULL */
    4846           7 :     if (PG_ARGISNULL(0))
    4847           1 :         PG_RETURN_NULL();
    4848           6 :     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
    4849             : 
    4850           6 :     result = concat_internal(sep, 1, fcinfo);
    4851           6 :     if (result == NULL)
    4852           1 :         PG_RETURN_NULL();
    4853           5 :     PG_RETURN_TEXT_P(result);
    4854             : }
    4855             : 
    4856             : /*
    4857             :  * Return first n characters in the string. When n is negative,
    4858             :  * return all but last |n| characters.
    4859             :  */
    4860             : Datum
    4861          11 : text_left(PG_FUNCTION_ARGS)
    4862             : {
    4863          11 :     text       *str = PG_GETARG_TEXT_PP(0);
    4864          11 :     const char *p = VARDATA_ANY(str);
    4865          11 :     int         len = VARSIZE_ANY_EXHDR(str);
    4866          11 :     int         n = PG_GETARG_INT32(1);
    4867             :     int         rlen;
    4868             : 
    4869          11 :     if (n < 0)
    4870           5 :         n = pg_mbstrlen_with_len(p, len) + n;
    4871          11 :     rlen = pg_mbcharcliplen(p, len, n);
    4872             : 
    4873          11 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
    4874             : }
    4875             : 
    4876             : /*
    4877             :  * Return last n characters in the string. When n is negative,
    4878             :  * return all but first |n| characters.
    4879             :  */
    4880             : Datum
    4881          11 : text_right(PG_FUNCTION_ARGS)
    4882             : {
    4883          11 :     text       *str = PG_GETARG_TEXT_PP(0);
    4884          11 :     const char *p = VARDATA_ANY(str);
    4885          11 :     int         len = VARSIZE_ANY_EXHDR(str);
    4886          11 :     int         n = PG_GETARG_INT32(1);
    4887             :     int         off;
    4888             : 
    4889          11 :     if (n < 0)
    4890           5 :         n = -n;
    4891             :     else
    4892           6 :         n = pg_mbstrlen_with_len(p, len) - n;
    4893          11 :     off = pg_mbcharcliplen(p, len, n);
    4894             : 
    4895          11 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
    4896             : }
    4897             : 
    4898             : /*
    4899             :  * Return reversed string
    4900             :  */
    4901             : Datum
    4902           1 : text_reverse(PG_FUNCTION_ARGS)
    4903             : {
    4904           1 :     text       *str = PG_GETARG_TEXT_PP(0);
    4905           1 :     const char *p = VARDATA_ANY(str);
    4906           1 :     int         len = VARSIZE_ANY_EXHDR(str);
    4907           1 :     const char *endp = p + len;
    4908             :     text       *result;
    4909             :     char       *dst;
    4910             : 
    4911           1 :     result = palloc(len + VARHDRSZ);
    4912           1 :     dst = (char *) VARDATA(result) + len;
    4913           1 :     SET_VARSIZE(result, len + VARHDRSZ);
    4914             : 
    4915           1 :     if (pg_database_encoding_max_length() > 1)
    4916             :     {
    4917             :         /* multibyte version */
    4918           7 :         while (p < endp)
    4919             :         {
    4920             :             int         sz;
    4921             : 
    4922           5 :             sz = pg_mblen(p);
    4923           5 :             dst -= sz;
    4924           5 :             memcpy(dst, p, sz);
    4925           5 :             p += sz;
    4926             :         }
    4927             :     }
    4928             :     else
    4929             :     {
    4930             :         /* single byte version */
    4931           0 :         while (p < endp)
    4932           0 :             *(--dst) = *p++;
    4933             :     }
    4934             : 
    4935           1 :     PG_RETURN_TEXT_P(result);
    4936             : }
    4937             : 
    4938             : 
    4939             : /*
    4940             :  * Support macros for text_format()
    4941             :  */
    4942             : #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
    4943             : 
    4944             : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
    4945             :     do { \
    4946             :         if (++(ptr) >= (end_ptr)) \
    4947             :             ereport(ERROR, \
    4948             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
    4949             :                      errmsg("unterminated format() type specifier"), \
    4950             :                      errhint("For a single \"%%\" use \"%%%%\"."))); \
    4951             :     } while (0)
    4952             : 
    4953             : /*
    4954             :  * Returns a formatted string
    4955             :  */
    4956             : Datum
    4957        1772 : text_format(PG_FUNCTION_ARGS)
    4958             : {
    4959             :     text       *fmt;
    4960             :     StringInfoData str;
    4961             :     const char *cp;
    4962             :     const char *start_ptr;
    4963             :     const char *end_ptr;
    4964             :     text       *result;
    4965             :     int         arg;
    4966             :     bool        funcvariadic;
    4967             :     int         nargs;
    4968        1772 :     Datum      *elements = NULL;
    4969        1772 :     bool       *nulls = NULL;
    4970        1772 :     Oid         element_type = InvalidOid;
    4971        1772 :     Oid         prev_type = InvalidOid;
    4972        1772 :     Oid         prev_width_type = InvalidOid;
    4973             :     FmgrInfo    typoutputfinfo;
    4974             :     FmgrInfo    typoutputinfo_width;
    4975             : 
    4976             :     /* When format string is null, immediately return null */
    4977        1772 :     if (PG_ARGISNULL(0))
    4978           1 :         PG_RETURN_NULL();
    4979             : 
    4980             :     /* If argument is marked VARIADIC, expand array into elements */
    4981        1771 :     if (get_fn_expr_variadic(fcinfo->flinfo))
    4982             :     {
    4983             :         ArrayType  *arr;
    4984             :         int16       elmlen;
    4985             :         bool        elmbyval;
    4986             :         char        elmalign;
    4987             :         int         nitems;
    4988             : 
    4989             :         /* Should have just the one argument */
    4990           8 :         Assert(PG_NARGS() == 2);
    4991             : 
    4992             :         /* If argument is NULL, we treat it as zero-length array */
    4993           8 :         if (PG_ARGISNULL(1))
    4994           1 :             nitems = 0;
    4995             :         else
    4996             :         {
    4997             :             /*
    4998             :              * Non-null argument had better be an array.  We assume that any
    4999             :              * call context that could let get_fn_expr_variadic return true
    5000             :              * will have checked that a VARIADIC-labeled parameter actually is
    5001             :              * an array.  So it should be okay to just Assert that it's an
    5002             :              * array rather than doing a full-fledged error check.
    5003             :              */
    5004           7 :             Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
    5005             : 
    5006             :             /* OK, safe to fetch the array value */
    5007           7 :             arr = PG_GETARG_ARRAYTYPE_P(1);
    5008             : 
    5009             :             /* Get info about array element type */
    5010           7 :             element_type = ARR_ELEMTYPE(arr);
    5011           7 :             get_typlenbyvalalign(element_type,
    5012             :                                  &elmlen, &elmbyval, &elmalign);
    5013             : 
    5014             :             /* Extract all array elements */
    5015           7 :             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
    5016             :                               &elements, &nulls, &nitems);
    5017             :         }
    5018             : 
    5019           8 :         nargs = nitems + 1;
    5020           8 :         funcvariadic = true;
    5021             :     }
    5022             :     else
    5023             :     {
    5024             :         /* Non-variadic case, we'll process the arguments individually */
    5025        1763 :         nargs = PG_NARGS();
    5026        1763 :         funcvariadic = false;
    5027             :     }
    5028             : 
    5029             :     /* Setup for main loop. */
    5030        1771 :     fmt = PG_GETARG_TEXT_PP(0);
    5031        1771 :     start_ptr = VARDATA_ANY(fmt);
    5032        1771 :     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
    5033        1771 :     initStringInfo(&str);
    5034        1771 :     arg = 1;                    /* next argument position to print */
    5035             : 
    5036             :     /* Scan format string, looking for conversion specifiers. */
    5037       57672 :     for (cp = start_ptr; cp < end_ptr; cp++)
    5038             :     {
    5039             :         int         argpos;
    5040             :         int         widthpos;
    5041             :         int         flags;
    5042             :         int         width;
    5043             :         Datum       value;
    5044             :         bool        isNull;
    5045             :         Oid         typid;
    5046             : 
    5047             :         /*
    5048             :          * If it's not the start of a conversion specifier, just copy it to
    5049             :          * the output buffer.
    5050             :          */
    5051       55911 :         if (*cp != '%')
    5052             :         {
    5053       51833 :             appendStringInfoCharMacro(&str, *cp);
    5054      103669 :             continue;
    5055             :         }
    5056             : 
    5057        4078 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5058             : 
    5059             :         /* Easy case: %% outputs a single % */
    5060        4078 :         if (*cp == '%')
    5061             :         {
    5062           3 :             appendStringInfoCharMacro(&str, *cp);
    5063           3 :             continue;
    5064             :         }
    5065             : 
    5066             :         /* Parse the optional portions of the format specifier */
    5067        4075 :         cp = text_format_parse_format(cp, end_ptr,
    5068             :                                       &argpos, &widthpos,
    5069             :                                       &flags, &width);
    5070             : 
    5071             :         /*
    5072             :          * Next we should see the main conversion specifier.  Whether or not
    5073             :          * an argument position was present, it's known that at least one
    5074             :          * character remains in the string at this point.  Experience suggests
    5075             :          * that it's worth checking that that character is one of the expected
    5076             :          * ones before we try to fetch arguments, so as to produce the least
    5077             :          * confusing response to a mis-formatted specifier.
    5078             :          */
    5079        4071 :         if (strchr("sIL", *cp) == NULL)
    5080           1 :             ereport(ERROR,
    5081             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5082             :                      errmsg("unrecognized format() type specifier \"%c\"",
    5083             :                             *cp),
    5084             :                      errhint("For a single \"%%\" use \"%%%%\".")));
    5085             : 
    5086             :         /* If indirect width was specified, get its value */
    5087        4070 :         if (widthpos >= 0)
    5088             :         {
    5089             :             /* Collect the specified or next argument position */
    5090           7 :             if (widthpos > 0)
    5091           6 :                 arg = widthpos;
    5092           7 :             if (arg >= nargs)
    5093           0 :                 ereport(ERROR,
    5094             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5095             :                          errmsg("too few arguments for format()")));
    5096             : 
    5097             :             /* Get the value and type of the selected argument */
    5098           7 :             if (!funcvariadic)
    5099             :             {
    5100           7 :                 value = PG_GETARG_DATUM(arg);
    5101           7 :                 isNull = PG_ARGISNULL(arg);
    5102           7 :                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5103             :             }
    5104             :             else
    5105             :             {
    5106           0 :                 value = elements[arg - 1];
    5107           0 :                 isNull = nulls[arg - 1];
    5108           0 :                 typid = element_type;
    5109             :             }
    5110           7 :             if (!OidIsValid(typid))
    5111           0 :                 elog(ERROR, "could not determine data type of format() input");
    5112             : 
    5113           7 :             arg++;
    5114             : 
    5115             :             /* We can treat NULL width the same as zero */
    5116           7 :             if (isNull)
    5117           1 :                 width = 0;
    5118           6 :             else if (typid == INT4OID)
    5119           6 :                 width = DatumGetInt32(value);
    5120           0 :             else if (typid == INT2OID)
    5121           0 :                 width = DatumGetInt16(value);
    5122             :             else
    5123             :             {
    5124             :                 /* For less-usual datatypes, convert to text then to int */
    5125             :                 char       *str;
    5126             : 
    5127           0 :                 if (typid != prev_width_type)
    5128             :                 {
    5129             :                     Oid         typoutputfunc;
    5130             :                     bool        typIsVarlena;
    5131             : 
    5132           0 :                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5133           0 :                     fmgr_info(typoutputfunc, &typoutputinfo_width);
    5134           0 :                     prev_width_type = typid;
    5135             :                 }
    5136             : 
    5137           0 :                 str = OutputFunctionCall(&typoutputinfo_width, value);
    5138             : 
    5139             :                 /* pg_atoi will complain about bad data or overflow */
    5140           0 :                 width = pg_atoi(str, sizeof(int), '\0');
    5141             : 
    5142           0 :                 pfree(str);
    5143             :             }
    5144             :         }
    5145             : 
    5146             :         /* Collect the specified or next argument position */
    5147        4070 :         if (argpos > 0)
    5148          22 :             arg = argpos;
    5149        4070 :         if (arg >= nargs)
    5150           4 :             ereport(ERROR,
    5151             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5152             :                      errmsg("too few arguments for format()")));
    5153             : 
    5154             :         /* Get the value and type of the selected argument */
    5155        4066 :         if (!funcvariadic)
    5156             :         {
    5157        3854 :             value = PG_GETARG_DATUM(arg);
    5158        3854 :             isNull = PG_ARGISNULL(arg);
    5159        3854 :             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
    5160             :         }
    5161             :         else
    5162             :         {
    5163         212 :             value = elements[arg - 1];
    5164         212 :             isNull = nulls[arg - 1];
    5165         212 :             typid = element_type;
    5166             :         }
    5167        4066 :         if (!OidIsValid(typid))
    5168           0 :             elog(ERROR, "could not determine data type of format() input");
    5169             : 
    5170        4066 :         arg++;
    5171             : 
    5172             :         /*
    5173             :          * Get the appropriate typOutput function, reusing previous one if
    5174             :          * same type as previous argument.  That's particularly useful in the
    5175             :          * variadic-array case, but often saves work even for ordinary calls.
    5176             :          */
    5177        4066 :         if (typid != prev_type)
    5178             :         {
    5179             :             Oid         typoutputfunc;
    5180             :             bool        typIsVarlena;
    5181             : 
    5182        2018 :             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
    5183        2018 :             fmgr_info(typoutputfunc, &typoutputfinfo);
    5184        2018 :             prev_type = typid;
    5185             :         }
    5186             : 
    5187             :         /*
    5188             :          * And now we can format the value.
    5189             :          */
    5190        4066 :         switch (*cp)
    5191             :         {
    5192             :             case 's':
    5193             :             case 'I':
    5194             :             case 'L':
    5195        4066 :                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
    5196             :                                               value, isNull,
    5197             :                                               flags, width);
    5198        4065 :                 break;
    5199             :             default:
    5200             :                 /* should not get here, because of previous check */
    5201           0 :                 ereport(ERROR,
    5202             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5203             :                          errmsg("unrecognized format() type specifier \"%c\"",
    5204             :                                 *cp),
    5205             :                          errhint("For a single \"%%\" use \"%%%%\".")));
    5206             :                 break;
    5207             :         }
    5208             :     }
    5209             : 
    5210             :     /* Don't need deconstruct_array results anymore. */
    5211        1761 :     if (elements != NULL)
    5212           7 :         pfree(elements);
    5213        1761 :     if (nulls != NULL)
    5214           7 :         pfree(nulls);
    5215             : 
    5216             :     /* Generate results. */
    5217        1761 :     result = cstring_to_text_with_len(str.data, str.len);
    5218        1761 :     pfree(str.data);
    5219             : 
    5220        1761 :     PG_RETURN_TEXT_P(result);
    5221             : }
    5222             : 
    5223             : /*
    5224             :  * Parse contiguous digits as a decimal number.
    5225             :  *
    5226             :  * Returns true if some digits could be parsed.
    5227             :  * The value is returned into *value, and *ptr is advanced to the next
    5228             :  * character to be parsed.
    5229             :  *
    5230             :  * Note parsing invariant: at least one character is known available before
    5231             :  * string end (end_ptr) at entry, and this is still true at exit.
    5232             :  */
    5233             : static bool
    5234        8144 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
    5235             : {
    5236        8144 :     bool        found = false;
    5237        8144 :     const char *cp = *ptr;
    5238        8144 :     int         val = 0;
    5239             : 
    5240       16340 :     while (*cp >= '0' && *cp <= '9')
    5241             :     {
    5242          53 :         int         newval = val * 10 + (*cp - '0');
    5243             : 
    5244          53 :         if (newval / 10 != val) /* overflow? */
    5245           0 :             ereport(ERROR,
    5246             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    5247             :                      errmsg("number is out of range")));
    5248          53 :         val = newval;
    5249          53 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5250          52 :         found = true;
    5251             :     }
    5252             : 
    5253        8143 :     *ptr = cp;
    5254        8143 :     *value = val;
    5255             : 
    5256        8143 :     return found;
    5257             : }
    5258             : 
    5259             : /*
    5260             :  * Parse a format specifier (generally following the SUS printf spec).
    5261             :  *
    5262             :  * We have already advanced over the initial '%', and we are looking for
    5263             :  * [argpos][flags][width]type (but the type character is not consumed here).
    5264             :  *
    5265             :  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
    5266             :  * Output parameters:
    5267             :  *  argpos: argument position for value to be printed.  -1 means unspecified.
    5268             :  *  widthpos: argument position for width.  Zero means the argument position
    5269             :  *          was unspecified (ie, take the next arg) and -1 means no width
    5270             :  *          argument (width was omitted or specified as a constant).
    5271             :  *  flags: bitmask of flags.
    5272             :  *  width: directly-specified width value.  Zero means the width was omitted
    5273             :  *          (note it's not necessary to distinguish this case from an explicit
    5274             :  *          zero width value).
    5275             :  *
    5276             :  * The function result is the next character position to be parsed, ie, the
    5277             :  * location where the type character is/should be.
    5278             :  *
    5279             :  * Note parsing invariant: at least one character is known available before
    5280             :  * string end (end_ptr) at entry, and this is still true at exit.
    5281             :  */
    5282             : static const char *
    5283        4075 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
    5284             :                          int *argpos, int *widthpos,
    5285             :                          int *flags, int *width)
    5286             : {
    5287        4075 :     const char *cp = start_ptr;
    5288             :     int         n;
    5289             : 
    5290             :     /* set defaults for output parameters */
    5291        4075 :     *argpos = -1;
    5292        4075 :     *widthpos = -1;
    5293        4075 :     *flags = 0;
    5294        4075 :     *width = 0;
    5295             : 
    5296             :     /* try to identify first number */
    5297        4075 :     if (text_format_parse_digits(&cp, end_ptr, &n))
    5298             :     {
    5299          29 :         if (*cp != '$')
    5300             :         {
    5301             :             /* Must be just a width and a type, so we're done */
    5302           4 :             *width = n;
    5303           4 :             return cp;
    5304             :         }
    5305             :         /* The number was argument position */
    5306          25 :         *argpos = n;
    5307             :         /* Explicit 0 for argument index is immediately refused */
    5308          25 :         if (n == 0)
    5309           1 :             ereport(ERROR,
    5310             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5311             :                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
    5312          24 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5313             :     }
    5314             : 
    5315             :     /* Handle flags (only minus is supported now) */
    5316        8143 :     while (*cp == '-')
    5317             :     {
    5318           5 :         *flags |= TEXT_FORMAT_FLAG_MINUS;
    5319           5 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5320             :     }
    5321             : 
    5322        4069 :     if (*cp == '*')
    5323             :     {
    5324             :         /* Handle indirect width */
    5325           8 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
    5326           8 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    5327             :         {
    5328             :             /* number in this position must be closed by $ */
    5329           7 :             if (*cp != '$')
    5330           0 :                 ereport(ERROR,
    5331             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5332             :                          errmsg("width argument position must be ended by \"$\"")));
    5333             :             /* The number was width argument position */
    5334           7 :             *widthpos = n;
    5335             :             /* Explicit 0 for argument index is immediately refused */
    5336           7 :             if (n == 0)
    5337           1 :                 ereport(ERROR,
    5338             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    5339             :                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
    5340           6 :             ADVANCE_PARSE_POINTER(cp, end_ptr);
    5341             :         }
    5342             :         else
    5343           1 :             *widthpos = 0;      /* width's argument position is unspecified */
    5344             :     }
    5345             :     else
    5346             :     {
    5347             :         /* Check for direct width specification */
    5348        4061 :         if (text_format_parse_digits(&cp, end_ptr, &n))
    5349           5 :             *width = n;
    5350             :     }
    5351             : 
    5352             :     /* cp should now be pointing at type character */
    5353        4067 :     return cp;
    5354             : }
    5355             : 
    5356             : /*
    5357             :  * Format a %s, %I, or %L conversion
    5358             :  */
    5359             : static void
    5360        4066 : text_format_string_conversion(StringInfo buf, char conversion,
    5361             :                               FmgrInfo *typOutputInfo,
    5362             :                               Datum value, bool isNull,
    5363             :                               int flags, int width)
    5364             : {
    5365             :     char       *str;
    5366             : 
    5367             :     /* Handle NULL arguments before trying to stringify the value. */
    5368        4066 :     if (isNull)
    5369             :     {
    5370          11 :         if (conversion == 's')
    5371           3 :             text_format_append_string(buf, "", flags, width);
    5372           8 :         else if (conversion == 'L')
    5373           7 :             text_format_append_string(buf, "NULL", flags, width);
    5374           1 :         else if (conversion == 'I')
    5375           1 :             ereport(ERROR,
    5376             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    5377             :                      errmsg("null values cannot be formatted as an SQL identifier")));
    5378        4075 :         return;
    5379             :     }
    5380             : 
    5381             :     /* Stringify. */
    5382        4055 :     str = OutputFunctionCall(typOutputInfo, value);
    5383             : 
    5384             :     /* Escape. */
    5385        4055 :     if (conversion == 'I')
    5386             :     {
    5387             :         /* quote_identifier may or may not allocate a new string. */
    5388         276 :         text_format_append_string(buf, quote_identifier(str), flags, width);
    5389             :     }
    5390        3779 :     else if (conversion == 'L')
    5391             :     {
    5392         253 :         char       *qstr = quote_literal_cstr(str);
    5393             : 
    5394         253 :         text_format_append_string(buf, qstr, flags, width);
    5395             :         /* quote_literal_cstr() always allocates a new string */
    5396         253 :         pfree(qstr);
    5397             :     }
    5398             :     else
    5399        3526 :         text_format_append_string(buf, str, flags, width);
    5400             : 
    5401             :     /* Cleanup. */
    5402        4055 :     pfree(str);
    5403             : }
    5404             : 
    5405             : /*
    5406             :  * Append str to buf, padding as directed by flags/width
    5407             :  */
    5408             : static void
    5409        4065 : text_format_append_string(StringInfo buf, const char *str,
    5410             :                           int flags, int width)
    5411             : {
    5412        4065 :     bool        align_to_left = false;
    5413             :     int         len;
    5414             : 
    5415             :     /* fast path for typical easy case */
    5416        4065 :     if (width == 0)
    5417             :     {
    5418        4051 :         appendStringInfoString(buf, str);
    5419        8116 :         return;
    5420             :     }
    5421             : 
    5422          14 :     if (width < 0)
    5423             :     {
    5424             :         /* Negative width: implicit '-' flag, then take absolute value */
    5425           1 :         align_to_left = true;
    5426             :         /* -INT_MIN is undefined */
    5427           1 :         if (width <= INT_MIN)
    5428           0 :             ereport(ERROR,
    5429             :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
    5430             :                      errmsg("number is out of range")));
    5431           1 :         width = -width;
    5432             :     }
    5433          13 :     else if (flags & TEXT_FORMAT_FLAG_MINUS)
    5434           4 :         align_to_left = true;
    5435             : 
    5436          14 :     len = pg_mbstrlen(str);
    5437          14 :     if (align_to_left)
    5438             :     {
    5439             :         /* left justify */
    5440           5 :         appendStringInfoString(buf, str);
    5441           5 :         if (len < width)
    5442           5 :             appendStringInfoSpaces(buf, width - len);
    5443             :     }
    5444             :     else
    5445             :     {
    5446             :         /* right justify */
    5447           9 :         if (len < width)
    5448           9 :             appendStringInfoSpaces(buf, width - len);
    5449           9 :         appendStringInfoString(buf, str);
    5450             :     }
    5451             : }
    5452             : 
    5453             : /*
    5454             :  * text_format_nv - nonvariadic wrapper for text_format function.
    5455             :  *
    5456             :  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
    5457             :  * which checks that all built-in functions that share the implementing C
    5458             :  * function take the same number of arguments.
    5459             :  */
    5460             : Datum
    5461           5 : text_format_nv(PG_FUNCTION_ARGS)
    5462             : {
    5463           5 :     return text_format(fcinfo);
    5464             : }
    5465             : 
    5466             : /*
    5467             :  * Helper function for Levenshtein distance functions. Faster than memcmp(),
    5468             :  * for this use case.
    5469             :  */
    5470             : static inline bool
    5471           0 : rest_of_char_same(const char *s1, const char *s2, int len)
    5472             : {
    5473           0 :     while (len > 0)
    5474             :     {
    5475           0 :         len--;
    5476           0 :         if (s1[len] != s2[len])
    5477           0 :             return false;
    5478             :     }
    5479           0 :     return true;
    5480             : }
    5481             : 
    5482             : /* Expand each Levenshtein distance variant */
    5483             : #include "levenshtein.c"
    5484             : #define LEVENSHTEIN_LESS_EQUAL
    5485             : #include "levenshtein.c"

Generated by: LCOV version 1.11