LCOV - code coverage report
Current view: top level - src/backend/regex - regc_pg_locale.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 92 260 35.4 %
Date: 2017-09-29 13:40:31 Functions: 9 14 64.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * regc_pg_locale.c
       4             :  *    ctype functions adapted to work on pg_wchar (a/k/a chr),
       5             :  *    and functions to cache the results of wholesale ctype probing.
       6             :  *
       7             :  * This file is #included by regcomp.c; it's not meant to compile standalone.
       8             :  *
       9             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      10             :  * Portions Copyright (c) 1994, Regents of the University of California
      11             :  *
      12             :  * IDENTIFICATION
      13             :  *    src/backend/regex/regc_pg_locale.c
      14             :  *
      15             :  *-------------------------------------------------------------------------
      16             :  */
      17             : 
      18             : #include "catalog/pg_collation.h"
      19             : #include "utils/pg_locale.h"
      20             : 
      21             : /*
      22             :  * To provide as much functionality as possible on a variety of platforms,
      23             :  * without going so far as to implement everything from scratch, we use
      24             :  * several implementation strategies depending on the situation:
      25             :  *
      26             :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      27             :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      28             :  * collations don't give a fig about multibyte characters.
      29             :  *
      30             :  * 2. In the "default" collation (which is supposed to obey LC_CTYPE):
      31             :  *
      32             :  * 2a. When working in UTF8 encoding, we use the <wctype.h> functions if
      33             :  * available.  This assumes that every platform uses Unicode codepoints
      34             :  * directly as the wchar_t representation of Unicode.  On some platforms
      35             :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      36             :  *
      37             :  * 2b. In all other encodings, or on machines that lack <wctype.h>, we use
      38             :  * the <ctype.h> functions for pg_wchar values up to 255, and punt for values
      39             :  * above that.  This is only 100% correct in single-byte encodings such as
      40             :  * LATINn.  However, non-Unicode multibyte encodings are mostly Far Eastern
      41             :  * character sets for which the properties being tested here aren't very
      42             :  * relevant for higher code values anyway.  The difficulty with using the
      43             :  * <wctype.h> functions with non-Unicode multibyte encodings is that we can
      44             :  * have no certainty that the platform's wchar_t representation matches
      45             :  * what we do in pg_wchar conversions.
      46             :  *
      47             :  * 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
      48             :  * Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
      49             :  * functions, under exactly the same cases as #2.
      50             :  *
      51             :  * There is one notable difference between cases 2 and 3: in the "default"
      52             :  * collation we force ASCII letters to follow ASCII upcase/downcase rules,
      53             :  * while in a non-default collation we just let the library functions do what
      54             :  * they will.  The case where this matters is treatment of I/i in Turkish,
      55             :  * and the behavior is meant to match the upper()/lower() SQL functions.
      56             :  *
      57             :  * We store the active collation setting in static variables.  In principle
      58             :  * it could be passed down to here via the regex library's "struct vars" data
      59             :  * structure; but that would require somewhat invasive changes in the regex
      60             :  * library, and right now there's no real benefit to be gained from that.
      61             :  *
      62             :  * NB: the coding here assumes pg_wchar is an unsigned type.
      63             :  */
      64             : 
      65             : typedef enum
      66             : {
      67             :     PG_REGEX_LOCALE_C,          /* C locale (encoding independent) */
      68             :     PG_REGEX_LOCALE_WIDE,       /* Use <wctype.h> functions */
      69             :     PG_REGEX_LOCALE_1BYTE,      /* Use <ctype.h> functions */
      70             :     PG_REGEX_LOCALE_WIDE_L,     /* Use locale_t <wctype.h> functions */
      71             :     PG_REGEX_LOCALE_1BYTE_L,    /* Use locale_t <ctype.h> functions */
      72             :     PG_REGEX_LOCALE_ICU         /* Use ICU uchar.h functions */
      73             : } PG_Locale_Strategy;
      74             : 
      75             : static PG_Locale_Strategy pg_regex_strategy;
      76             : static pg_locale_t pg_regex_locale;
      77             : static Oid  pg_regex_collation;
      78             : 
      79             : /*
      80             :  * Hard-wired character properties for C locale
      81             :  */
      82             : #define PG_ISDIGIT  0x01
      83             : #define PG_ISALPHA  0x02
      84             : #define PG_ISALNUM  (PG_ISDIGIT | PG_ISALPHA)
      85             : #define PG_ISUPPER  0x04
      86             : #define PG_ISLOWER  0x08
      87             : #define PG_ISGRAPH  0x10
      88             : #define PG_ISPRINT  0x20
      89             : #define PG_ISPUNCT  0x40
      90             : #define PG_ISSPACE  0x80
      91             : 
      92             : static const unsigned char pg_char_properties[128] = {
      93             :      /* NUL */ 0,
      94             :      /* ^A */ 0,
      95             :      /* ^B */ 0,
      96             :      /* ^C */ 0,
      97             :      /* ^D */ 0,
      98             :      /* ^E */ 0,
      99             :      /* ^F */ 0,
     100             :      /* ^G */ 0,
     101             :      /* ^H */ 0,
     102             :      /* ^I */ PG_ISSPACE,
     103             :      /* ^J */ PG_ISSPACE,
     104             :      /* ^K */ PG_ISSPACE,
     105             :      /* ^L */ PG_ISSPACE,
     106             :      /* ^M */ PG_ISSPACE,
     107             :      /* ^N */ 0,
     108             :      /* ^O */ 0,
     109             :      /* ^P */ 0,
     110             :      /* ^Q */ 0,
     111             :      /* ^R */ 0,
     112             :      /* ^S */ 0,
     113             :      /* ^T */ 0,
     114             :      /* ^U */ 0,
     115             :      /* ^V */ 0,
     116             :      /* ^W */ 0,
     117             :      /* ^X */ 0,
     118             :      /* ^Y */ 0,
     119             :      /* ^Z */ 0,
     120             :      /* ^[ */ 0,
     121             :      /* ^\ */ 0,
     122             :      /* ^] */ 0,
     123             :      /* ^^ */ 0,
     124             :      /* ^_ */ 0,
     125             :      /* */ PG_ISPRINT | PG_ISSPACE,
     126             :      /* !  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     127             :      /* "  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     128             :      /* #  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     129             :      /* $  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     130             :      /* %  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     131             :      /* &  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     132             :      /* '  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     133             :      /* (  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     134             :      /* )  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     135             :      /* *  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     136             :      /* +  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     137             :      /* ,  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     138             :      /* -  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     139             :      /* .  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     140             :      /* /  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     141             :      /* 0  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     142             :      /* 1  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     143             :      /* 2  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     144             :      /* 3  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     145             :      /* 4  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     146             :      /* 5  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     147             :      /* 6  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     148             :      /* 7  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     149             :      /* 8  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     150             :      /* 9  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     151             :      /* :  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     152             :      /* ;  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     153             :      /* <  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     154             :      /* =  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     155             :      /* >  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     156             :      /* ?  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     157             :      /* @  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     158             :      /* A  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     159             :      /* B  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     160             :      /* C  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     161             :      /* D  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     162             :      /* E  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     163             :      /* F  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     164             :      /* G  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     165             :      /* H  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     166             :      /* I  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     167             :      /* J  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     168             :      /* K  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     169             :      /* L  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     170             :      /* M  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     171             :      /* N  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     172             :      /* O  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     173             :      /* P  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     174             :      /* Q  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     175             :      /* R  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     176             :      /* S  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     177             :      /* T  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     178             :      /* U  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     179             :      /* V  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     180             :      /* W  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     181             :      /* X  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     182             :      /* Y  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     183             :      /* Z  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     184             :      /* [  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     185             :      /* \  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     186             :      /* ]  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     187             :      /* ^  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     188             :      /* _  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     189             :      /* `  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     190             :      /* a  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     191             :      /* b  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     192             :      /* c  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     193             :      /* d  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     194             :      /* e  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     195             :      /* f  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     196             :      /* g  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     197             :      /* h  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     198             :      /* i  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     199             :      /* j  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     200             :      /* k  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     201             :      /* l  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     202             :      /* m  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     203             :      /* n  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     204             :      /* o  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     205             :      /* p  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     206             :      /* q  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     207             :      /* r  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     208             :      /* s  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     209             :      /* t  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     210             :      /* u  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     211             :      /* v  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     212             :      /* w  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     213             :      /* x  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     214             :      /* y  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     215             :      /* z  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     216             :      /* {  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     217             :      /* |  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     218             :      /* }  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     219             :      /* ~  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     220             :      /* DEL */ 0
     221             : };
     222             : 
     223             : 
     224             : /*
     225             :  * pg_set_regex_collation: set collation for these functions to obey
     226             :  *
     227             :  * This is called when beginning compilation or execution of a regexp.
     228             :  * Since there's no need for reentrancy of regexp operations, it's okay
     229             :  * to store the results in static variables.
     230             :  */
     231             : void
     232       33811 : pg_set_regex_collation(Oid collation)
     233             : {
     234       33811 :     if (lc_ctype_is_c(collation))
     235             :     {
     236             :         /* C/POSIX collations use this path regardless of database encoding */
     237           2 :         pg_regex_strategy = PG_REGEX_LOCALE_C;
     238           2 :         pg_regex_locale = 0;
     239           2 :         pg_regex_collation = C_COLLATION_OID;
     240             :     }
     241             :     else
     242             :     {
     243       33809 :         if (collation == DEFAULT_COLLATION_OID)
     244       33809 :             pg_regex_locale = 0;
     245           0 :         else if (OidIsValid(collation))
     246             :         {
     247             :             /*
     248             :              * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T;
     249             :              * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not
     250             :              * have to be considered below.
     251             :              */
     252           0 :             pg_regex_locale = pg_newlocale_from_collation(collation);
     253             :         }
     254             :         else
     255             :         {
     256             :             /*
     257             :              * This typically means that the parser could not resolve a
     258             :              * conflict of implicit collations, so report it that way.
     259             :              */
     260           0 :             ereport(ERROR,
     261             :                     (errcode(ERRCODE_INDETERMINATE_COLLATION),
     262             :                      errmsg("could not determine which collation to use for regular expression"),
     263             :                      errhint("Use the COLLATE clause to set the collation explicitly.")));
     264             :         }
     265             : 
     266             : #ifdef USE_ICU
     267             :         if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
     268             :             pg_regex_strategy = PG_REGEX_LOCALE_ICU;
     269             :         else
     270             : #endif
     271             : #ifdef USE_WIDE_UPPER_LOWER
     272       33809 :         if (GetDatabaseEncoding() == PG_UTF8)
     273             :         {
     274       33809 :             if (pg_regex_locale)
     275           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
     276             :             else
     277       33809 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
     278             :         }
     279             :         else
     280             : #endif                          /* USE_WIDE_UPPER_LOWER */
     281             :         {
     282           0 :             if (pg_regex_locale)
     283           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
     284             :             else
     285           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
     286             :         }
     287             : 
     288       33809 :         pg_regex_collation = collation;
     289             :     }
     290       33811 : }
     291             : 
     292             : static int
     293        6148 : pg_wc_isdigit(pg_wchar c)
     294             : {
     295        6148 :     switch (pg_regex_strategy)
     296             :     {
     297             :         case PG_REGEX_LOCALE_C:
     298           0 :             return (c <= (pg_wchar) 127 &&
     299           0 :                     (pg_char_properties[c] & PG_ISDIGIT));
     300             :         case PG_REGEX_LOCALE_WIDE:
     301             : #ifdef USE_WIDE_UPPER_LOWER
     302             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     303        6148 :                 return iswdigit((wint_t) c);
     304             : #endif
     305             :             /* FALL THRU */
     306             :         case PG_REGEX_LOCALE_1BYTE:
     307           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     308           0 :                     isdigit((unsigned char) c));
     309             :         case PG_REGEX_LOCALE_WIDE_L:
     310             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     311             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     312           0 :                 return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
     313             : #endif
     314             :             /* FALL THRU */
     315             :         case PG_REGEX_LOCALE_1BYTE_L:
     316             : #ifdef HAVE_LOCALE_T
     317           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     318           0 :                     isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
     319             : #endif
     320             :             break;
     321             :         case PG_REGEX_LOCALE_ICU:
     322             : #ifdef USE_ICU
     323             :             return u_isdigit(c);
     324             : #endif
     325           0 :             break;
     326             :     }
     327           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     328             : }
     329             : 
     330             : static int
     331           8 : pg_wc_isalpha(pg_wchar c)
     332             : {
     333           8 :     switch (pg_regex_strategy)
     334             :     {
     335             :         case PG_REGEX_LOCALE_C:
     336           0 :             return (c <= (pg_wchar) 127 &&
     337           0 :                     (pg_char_properties[c] & PG_ISALPHA));
     338             :         case PG_REGEX_LOCALE_WIDE:
     339             : #ifdef USE_WIDE_UPPER_LOWER
     340             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     341           8 :                 return iswalpha((wint_t) c);
     342             : #endif
     343             :             /* FALL THRU */
     344             :         case PG_REGEX_LOCALE_1BYTE:
     345           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     346           0 :                     isalpha((unsigned char) c));
     347             :         case PG_REGEX_LOCALE_WIDE_L:
     348             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     349             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     350           0 :                 return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
     351             : #endif
     352             :             /* FALL THRU */
     353             :         case PG_REGEX_LOCALE_1BYTE_L:
     354             : #ifdef HAVE_LOCALE_T
     355           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     356           0 :                     isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
     357             : #endif
     358             :             break;
     359             :         case PG_REGEX_LOCALE_ICU:
     360             : #ifdef USE_ICU
     361             :             return u_isalpha(c);
     362             : #endif
     363           0 :             break;
     364             :     }
     365           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     366             : }
     367             : 
     368             : static int
     369        2082 : pg_wc_isalnum(pg_wchar c)
     370             : {
     371        2082 :     switch (pg_regex_strategy)
     372             :     {
     373             :         case PG_REGEX_LOCALE_C:
     374           2 :             return (c <= (pg_wchar) 127 &&
     375           1 :                     (pg_char_properties[c] & PG_ISALNUM));
     376             :         case PG_REGEX_LOCALE_WIDE:
     377             : #ifdef USE_WIDE_UPPER_LOWER
     378             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     379        2081 :                 return iswalnum((wint_t) c);
     380             : #endif
     381             :             /* FALL THRU */
     382             :         case PG_REGEX_LOCALE_1BYTE:
     383           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     384           0 :                     isalnum((unsigned char) c));
     385             :         case PG_REGEX_LOCALE_WIDE_L:
     386             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     387             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     388           0 :                 return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
     389             : #endif
     390             :             /* FALL THRU */
     391             :         case PG_REGEX_LOCALE_1BYTE_L:
     392             : #ifdef HAVE_LOCALE_T
     393           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     394           0 :                     isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
     395             : #endif
     396             :             break;
     397             :         case PG_REGEX_LOCALE_ICU:
     398             : #ifdef USE_ICU
     399             :             return u_isalnum(c);
     400             : #endif
     401           0 :             break;
     402             :     }
     403           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     404             : }
     405             : 
     406             : static int
     407           0 : pg_wc_isupper(pg_wchar c)
     408             : {
     409           0 :     switch (pg_regex_strategy)
     410             :     {
     411             :         case PG_REGEX_LOCALE_C:
     412           0 :             return (c <= (pg_wchar) 127 &&
     413           0 :                     (pg_char_properties[c] & PG_ISUPPER));
     414             :         case PG_REGEX_LOCALE_WIDE:
     415             : #ifdef USE_WIDE_UPPER_LOWER
     416             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     417           0 :                 return iswupper((wint_t) c);
     418             : #endif
     419             :             /* FALL THRU */
     420             :         case PG_REGEX_LOCALE_1BYTE:
     421           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     422           0 :                     isupper((unsigned char) c));
     423             :         case PG_REGEX_LOCALE_WIDE_L:
     424             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     425             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     426           0 :                 return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
     427             : #endif
     428             :             /* FALL THRU */
     429             :         case PG_REGEX_LOCALE_1BYTE_L:
     430             : #ifdef HAVE_LOCALE_T
     431           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     432           0 :                     isupper_l((unsigned char) c, pg_regex_locale->info.lt));
     433             : #endif
     434             :             break;
     435             :         case PG_REGEX_LOCALE_ICU:
     436             : #ifdef USE_ICU
     437             :             return u_isupper(c);
     438             : #endif
     439           0 :             break;
     440             :     }
     441           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     442             : }
     443             : 
     444             : static int
     445           0 : pg_wc_islower(pg_wchar c)
     446             : {
     447           0 :     switch (pg_regex_strategy)
     448             :     {
     449             :         case PG_REGEX_LOCALE_C:
     450           0 :             return (c <= (pg_wchar) 127 &&
     451           0 :                     (pg_char_properties[c] & PG_ISLOWER));
     452             :         case PG_REGEX_LOCALE_WIDE:
     453             : #ifdef USE_WIDE_UPPER_LOWER
     454             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     455           0 :                 return iswlower((wint_t) c);
     456             : #endif
     457             :             /* FALL THRU */
     458             :         case PG_REGEX_LOCALE_1BYTE:
     459           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     460           0 :                     islower((unsigned char) c));
     461             :         case PG_REGEX_LOCALE_WIDE_L:
     462             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     463             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     464           0 :                 return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
     465             : #endif
     466             :             /* FALL THRU */
     467             :         case PG_REGEX_LOCALE_1BYTE_L:
     468             : #ifdef HAVE_LOCALE_T
     469           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     470           0 :                     islower_l((unsigned char) c, pg_regex_locale->info.lt));
     471             : #endif
     472             :             break;
     473             :         case PG_REGEX_LOCALE_ICU:
     474             : #ifdef USE_ICU
     475             :             return u_islower(c);
     476             : #endif
     477           0 :             break;
     478             :     }
     479           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     480             : }
     481             : 
     482             : static int
     483           0 : pg_wc_isgraph(pg_wchar c)
     484             : {
     485           0 :     switch (pg_regex_strategy)
     486             :     {
     487             :         case PG_REGEX_LOCALE_C:
     488           0 :             return (c <= (pg_wchar) 127 &&
     489           0 :                     (pg_char_properties[c] & PG_ISGRAPH));
     490             :         case PG_REGEX_LOCALE_WIDE:
     491             : #ifdef USE_WIDE_UPPER_LOWER
     492             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     493           0 :                 return iswgraph((wint_t) c);
     494             : #endif
     495             :             /* FALL THRU */
     496             :         case PG_REGEX_LOCALE_1BYTE:
     497           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     498           0 :                     isgraph((unsigned char) c));
     499             :         case PG_REGEX_LOCALE_WIDE_L:
     500             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     501             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     502           0 :                 return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
     503             : #endif
     504             :             /* FALL THRU */
     505             :         case PG_REGEX_LOCALE_1BYTE_L:
     506             : #ifdef HAVE_LOCALE_T
     507           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     508           0 :                     isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
     509             : #endif
     510             :             break;
     511             :         case PG_REGEX_LOCALE_ICU:
     512             : #ifdef USE_ICU
     513             :             return u_isgraph(c);
     514             : #endif
     515           0 :             break;
     516             :     }
     517           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     518             : }
     519             : 
     520             : static int
     521           0 : pg_wc_isprint(pg_wchar c)
     522             : {
     523           0 :     switch (pg_regex_strategy)
     524             :     {
     525             :         case PG_REGEX_LOCALE_C:
     526           0 :             return (c <= (pg_wchar) 127 &&
     527           0 :                     (pg_char_properties[c] & PG_ISPRINT));
     528             :         case PG_REGEX_LOCALE_WIDE:
     529             : #ifdef USE_WIDE_UPPER_LOWER
     530             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     531           0 :                 return iswprint((wint_t) c);
     532             : #endif
     533             :             /* FALL THRU */
     534             :         case PG_REGEX_LOCALE_1BYTE:
     535           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     536           0 :                     isprint((unsigned char) c));
     537             :         case PG_REGEX_LOCALE_WIDE_L:
     538             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     539             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     540           0 :                 return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
     541             : #endif
     542             :             /* FALL THRU */
     543             :         case PG_REGEX_LOCALE_1BYTE_L:
     544             : #ifdef HAVE_LOCALE_T
     545           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     546           0 :                     isprint_l((unsigned char) c, pg_regex_locale->info.lt));
     547             : #endif
     548             :             break;
     549             :         case PG_REGEX_LOCALE_ICU:
     550             : #ifdef USE_ICU
     551             :             return u_isprint(c);
     552             : #endif
     553           0 :             break;
     554             :     }
     555           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     556             : }
     557             : 
     558             : static int
     559           0 : pg_wc_ispunct(pg_wchar c)
     560             : {
     561           0 :     switch (pg_regex_strategy)
     562             :     {
     563             :         case PG_REGEX_LOCALE_C:
     564           0 :             return (c <= (pg_wchar) 127 &&
     565           0 :                     (pg_char_properties[c] & PG_ISPUNCT));
     566             :         case PG_REGEX_LOCALE_WIDE:
     567             : #ifdef USE_WIDE_UPPER_LOWER
     568             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     569           0 :                 return iswpunct((wint_t) c);
     570             : #endif
     571             :             /* FALL THRU */
     572             :         case PG_REGEX_LOCALE_1BYTE:
     573           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     574           0 :                     ispunct((unsigned char) c));
     575             :         case PG_REGEX_LOCALE_WIDE_L:
     576             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     577             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     578           0 :                 return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
     579             : #endif
     580             :             /* FALL THRU */
     581             :         case PG_REGEX_LOCALE_1BYTE_L:
     582             : #ifdef HAVE_LOCALE_T
     583           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     584           0 :                     ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
     585             : #endif
     586             :             break;
     587             :         case PG_REGEX_LOCALE_ICU:
     588             : #ifdef USE_ICU
     589             :             return u_ispunct(c);
     590             : #endif
     591           0 :             break;
     592             :     }
     593           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     594             : }
     595             : 
     596             : static int
     597        4096 : pg_wc_isspace(pg_wchar c)
     598             : {
     599        4096 :     switch (pg_regex_strategy)
     600             :     {
     601             :         case PG_REGEX_LOCALE_C:
     602           0 :             return (c <= (pg_wchar) 127 &&
     603           0 :                     (pg_char_properties[c] & PG_ISSPACE));
     604             :         case PG_REGEX_LOCALE_WIDE:
     605             : #ifdef USE_WIDE_UPPER_LOWER
     606             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     607        4096 :                 return iswspace((wint_t) c);
     608             : #endif
     609             :             /* FALL THRU */
     610             :         case PG_REGEX_LOCALE_1BYTE:
     611           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     612           0 :                     isspace((unsigned char) c));
     613             :         case PG_REGEX_LOCALE_WIDE_L:
     614             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     615             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     616           0 :                 return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
     617             : #endif
     618             :             /* FALL THRU */
     619             :         case PG_REGEX_LOCALE_1BYTE_L:
     620             : #ifdef HAVE_LOCALE_T
     621           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     622           0 :                     isspace_l((unsigned char) c, pg_regex_locale->info.lt));
     623             : #endif
     624             :             break;
     625             :         case PG_REGEX_LOCALE_ICU:
     626             : #ifdef USE_ICU
     627             :             return u_isspace(c);
     628             : #endif
     629           0 :             break;
     630             :     }
     631           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     632             : }
     633             : 
     634             : static pg_wchar
     635          16 : pg_wc_toupper(pg_wchar c)
     636             : {
     637          16 :     switch (pg_regex_strategy)
     638             :     {
     639             :         case PG_REGEX_LOCALE_C:
     640           0 :             if (c <= (pg_wchar) 127)
     641           0 :                 return pg_ascii_toupper((unsigned char) c);
     642           0 :             return c;
     643             :         case PG_REGEX_LOCALE_WIDE:
     644             :             /* force C behavior for ASCII characters, per comments above */
     645          16 :             if (c <= (pg_wchar) 127)
     646          16 :                 return pg_ascii_toupper((unsigned char) c);
     647             : #ifdef USE_WIDE_UPPER_LOWER
     648             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     649           0 :                 return towupper((wint_t) c);
     650             : #endif
     651             :             /* FALL THRU */
     652             :         case PG_REGEX_LOCALE_1BYTE:
     653             :             /* force C behavior for ASCII characters, per comments above */
     654           0 :             if (c <= (pg_wchar) 127)
     655           0 :                 return pg_ascii_toupper((unsigned char) c);
     656           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     657           0 :                 return toupper((unsigned char) c);
     658           0 :             return c;
     659             :         case PG_REGEX_LOCALE_WIDE_L:
     660             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     661             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     662           0 :                 return towupper_l((wint_t) c, pg_regex_locale->info.lt);
     663             : #endif
     664             :             /* FALL THRU */
     665             :         case PG_REGEX_LOCALE_1BYTE_L:
     666             : #ifdef HAVE_LOCALE_T
     667           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     668           0 :                 return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
     669             : #endif
     670           0 :             return c;
     671             :         case PG_REGEX_LOCALE_ICU:
     672             : #ifdef USE_ICU
     673             :             return u_toupper(c);
     674             : #endif
     675           0 :             break;
     676             :     }
     677           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     678             : }
     679             : 
     680             : static pg_wchar
     681          16 : pg_wc_tolower(pg_wchar c)
     682             : {
     683          16 :     switch (pg_regex_strategy)
     684             :     {
     685             :         case PG_REGEX_LOCALE_C:
     686           0 :             if (c <= (pg_wchar) 127)
     687           0 :                 return pg_ascii_tolower((unsigned char) c);
     688           0 :             return c;
     689             :         case PG_REGEX_LOCALE_WIDE:
     690             :             /* force C behavior for ASCII characters, per comments above */
     691          16 :             if (c <= (pg_wchar) 127)
     692          16 :                 return pg_ascii_tolower((unsigned char) c);
     693             : #ifdef USE_WIDE_UPPER_LOWER
     694             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     695           0 :                 return towlower((wint_t) c);
     696             : #endif
     697             :             /* FALL THRU */
     698             :         case PG_REGEX_LOCALE_1BYTE:
     699             :             /* force C behavior for ASCII characters, per comments above */
     700           0 :             if (c <= (pg_wchar) 127)
     701           0 :                 return pg_ascii_tolower((unsigned char) c);
     702           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     703           0 :                 return tolower((unsigned char) c);
     704           0 :             return c;
     705             :         case PG_REGEX_LOCALE_WIDE_L:
     706             : #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
     707             :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     708           0 :                 return towlower_l((wint_t) c, pg_regex_locale->info.lt);
     709             : #endif
     710             :             /* FALL THRU */
     711             :         case PG_REGEX_LOCALE_1BYTE_L:
     712             : #ifdef HAVE_LOCALE_T
     713           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     714           0 :                 return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
     715             : #endif
     716           0 :             return c;
     717             :         case PG_REGEX_LOCALE_ICU:
     718             : #ifdef USE_ICU
     719             :             return u_tolower(c);
     720             : #endif
     721           0 :             break;
     722             :     }
     723           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     724             : }
     725             : 
     726             : 
     727             : /*
     728             :  * These functions cache the results of probing libc's ctype behavior for
     729             :  * all character codes of interest in a given encoding/collation.  The
     730             :  * result is provided as a "struct cvec", but notice that the representation
     731             :  * is a touch different from a cvec created by regc_cvec.c: we allocate the
     732             :  * chrs[] and ranges[] arrays separately from the struct so that we can
     733             :  * realloc them larger at need.  This is okay since the cvecs made here
     734             :  * should never be freed by freecvec().
     735             :  *
     736             :  * We use malloc not palloc since we mustn't lose control on out-of-memory;
     737             :  * the main regex code expects us to return a failure indication instead.
     738             :  */
     739             : 
     740             : typedef int (*pg_wc_probefunc) (pg_wchar c);
     741             : 
     742             : typedef struct pg_ctype_cache
     743             : {
     744             :     pg_wc_probefunc probefunc;  /* pg_wc_isalpha or a sibling */
     745             :     Oid         collation;      /* collation this entry is for */
     746             :     struct cvec cv;             /* cache entry contents */
     747             :     struct pg_ctype_cache *next;    /* chain link */
     748             : } pg_ctype_cache;
     749             : 
     750             : static pg_ctype_cache *pg_ctype_cache_list = NULL;
     751             : 
     752             : /*
     753             :  * Add a chr or range to pcc->cv; return false if run out of memory
     754             :  */
     755             : static bool
     756          51 : store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
     757             : {
     758             :     chr        *newchrs;
     759             : 
     760          51 :     if (nchrs > 1)
     761             :     {
     762          36 :         if (pcc->cv.nranges >= pcc->cv.rangespace)
     763             :         {
     764           0 :             pcc->cv.rangespace *= 2;
     765           0 :             newchrs = (chr *) realloc(pcc->cv.ranges,
     766           0 :                                       pcc->cv.rangespace * sizeof(chr) * 2);
     767           0 :             if (newchrs == NULL)
     768           0 :                 return false;
     769           0 :             pcc->cv.ranges = newchrs;
     770             :         }
     771          36 :         pcc->cv.ranges[pcc->cv.nranges * 2] = chr1;
     772          36 :         pcc->cv.ranges[pcc->cv.nranges * 2 + 1] = chr1 + nchrs - 1;
     773          36 :         pcc->cv.nranges++;
     774             :     }
     775             :     else
     776             :     {
     777          15 :         assert(nchrs == 1);
     778          15 :         if (pcc->cv.nchrs >= pcc->cv.chrspace)
     779             :         {
     780           0 :             pcc->cv.chrspace *= 2;
     781           0 :             newchrs = (chr *) realloc(pcc->cv.chrs,
     782           0 :                                       pcc->cv.chrspace * sizeof(chr));
     783           0 :             if (newchrs == NULL)
     784           0 :                 return false;
     785           0 :             pcc->cv.chrs = newchrs;
     786             :         }
     787          15 :         pcc->cv.chrs[pcc->cv.nchrs++] = chr1;
     788             :     }
     789          51 :     return true;
     790             : }
     791             : 
     792             : /*
     793             :  * Given a probe function (e.g., pg_wc_isalpha) get a struct cvec for all
     794             :  * chrs satisfying the probe function.  The active collation is the one
     795             :  * previously set by pg_set_regex_collation.  Return NULL if out of memory.
     796             :  *
     797             :  * Note that the result must not be freed or modified by caller.
     798             :  */
     799             : static struct cvec *
     800          18 : pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
     801             : {
     802             :     pg_ctype_cache *pcc;
     803             :     pg_wchar    max_chr;
     804             :     pg_wchar    cur_chr;
     805             :     int         nmatches;
     806             :     chr        *newchrs;
     807             : 
     808             :     /*
     809             :      * Do we already have the answer cached?
     810             :      */
     811          19 :     for (pcc = pg_ctype_cache_list; pcc != NULL; pcc = pcc->next)
     812             :     {
     813          25 :         if (pcc->probefunc == probefunc &&
     814          12 :             pcc->collation == pg_regex_collation)
     815          12 :             return &pcc->cv;
     816             :     }
     817             : 
     818             :     /*
     819             :      * Nope, so initialize some workspace ...
     820             :      */
     821           6 :     pcc = (pg_ctype_cache *) malloc(sizeof(pg_ctype_cache));
     822           6 :     if (pcc == NULL)
     823           0 :         return NULL;
     824           6 :     pcc->probefunc = probefunc;
     825           6 :     pcc->collation = pg_regex_collation;
     826           6 :     pcc->cv.nchrs = 0;
     827           6 :     pcc->cv.chrspace = 128;
     828           6 :     pcc->cv.chrs = (chr *) malloc(pcc->cv.chrspace * sizeof(chr));
     829           6 :     pcc->cv.nranges = 0;
     830           6 :     pcc->cv.rangespace = 64;
     831           6 :     pcc->cv.ranges = (chr *) malloc(pcc->cv.rangespace * sizeof(chr) * 2);
     832           6 :     if (pcc->cv.chrs == NULL || pcc->cv.ranges == NULL)
     833             :         goto out_of_memory;
     834           6 :     pcc->cv.cclasscode = cclasscode;
     835             : 
     836             :     /*
     837             :      * Decide how many character codes we ought to look through.  In general
     838             :      * we don't go past MAX_SIMPLE_CHR; chr codes above that are handled at
     839             :      * runtime using the "high colormap" mechanism.  However, in C locale
     840             :      * there's no need to go further than 127, and if we only have a 1-byte
     841             :      * <ctype.h> API there's no need to go further than that can handle.
     842             :      *
     843             :      * If it's not MAX_SIMPLE_CHR that's constraining the search, mark the
     844             :      * output cvec as not having any locale-dependent behavior, since there
     845             :      * will be no need to do any run-time locale checks.  (The #if's here
     846             :      * would always be true for production values of MAX_SIMPLE_CHR, but it's
     847             :      * useful to allow it to be small for testing purposes.)
     848             :      */
     849           6 :     switch (pg_regex_strategy)
     850             :     {
     851             :         case PG_REGEX_LOCALE_C:
     852             : #if MAX_SIMPLE_CHR >= 127
     853           0 :             max_chr = (pg_wchar) 127;
     854           0 :             pcc->cv.cclasscode = -1;
     855             : #else
     856             :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     857             : #endif
     858           0 :             break;
     859             :         case PG_REGEX_LOCALE_WIDE:
     860             :         case PG_REGEX_LOCALE_WIDE_L:
     861           6 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     862           6 :             break;
     863             :         case PG_REGEX_LOCALE_1BYTE:
     864             :         case PG_REGEX_LOCALE_1BYTE_L:
     865             : #if MAX_SIMPLE_CHR >= UCHAR_MAX
     866           0 :             max_chr = (pg_wchar) UCHAR_MAX;
     867           0 :             pcc->cv.cclasscode = -1;
     868             : #else
     869             :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     870             : #endif
     871           0 :             break;
     872             :         case PG_REGEX_LOCALE_ICU:
     873           0 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     874           0 :             break;
     875             :         default:
     876           0 :             max_chr = 0;        /* can't get here, but keep compiler quiet */
     877           0 :             break;
     878             :     }
     879             : 
     880             :     /*
     881             :      * And scan 'em ...
     882             :      */
     883           6 :     nmatches = 0;               /* number of consecutive matches */
     884             : 
     885       12294 :     for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
     886             :     {
     887       12288 :         if ((*probefunc) (cur_chr))
     888        1495 :             nmatches++;
     889       10793 :         else if (nmatches > 0)
     890             :         {
     891          51 :             if (!store_match(pcc, cur_chr - nmatches, nmatches))
     892           0 :                 goto out_of_memory;
     893          51 :             nmatches = 0;
     894             :         }
     895             :     }
     896             : 
     897           6 :     if (nmatches > 0)
     898           0 :         if (!store_match(pcc, cur_chr - nmatches, nmatches))
     899           0 :             goto out_of_memory;
     900             : 
     901             :     /*
     902             :      * We might have allocated more memory than needed, if so free it
     903             :      */
     904           6 :     if (pcc->cv.nchrs == 0)
     905             :     {
     906           3 :         free(pcc->cv.chrs);
     907           3 :         pcc->cv.chrs = NULL;
     908           3 :         pcc->cv.chrspace = 0;
     909             :     }
     910           3 :     else if (pcc->cv.nchrs < pcc->cv.chrspace)
     911             :     {
     912           3 :         newchrs = (chr *) realloc(pcc->cv.chrs,
     913           3 :                                   pcc->cv.nchrs * sizeof(chr));
     914           3 :         if (newchrs == NULL)
     915           0 :             goto out_of_memory;
     916           3 :         pcc->cv.chrs = newchrs;
     917           3 :         pcc->cv.chrspace = pcc->cv.nchrs;
     918             :     }
     919           6 :     if (pcc->cv.nranges == 0)
     920             :     {
     921           0 :         free(pcc->cv.ranges);
     922           0 :         pcc->cv.ranges = NULL;
     923           0 :         pcc->cv.rangespace = 0;
     924             :     }
     925           6 :     else if (pcc->cv.nranges < pcc->cv.rangespace)
     926             :     {
     927           6 :         newchrs = (chr *) realloc(pcc->cv.ranges,
     928           6 :                                   pcc->cv.nranges * sizeof(chr) * 2);
     929           6 :         if (newchrs == NULL)
     930           0 :             goto out_of_memory;
     931           6 :         pcc->cv.ranges = newchrs;
     932           6 :         pcc->cv.rangespace = pcc->cv.nranges;
     933             :     }
     934             : 
     935             :     /*
     936             :      * Success, link it into cache chain
     937             :      */
     938           6 :     pcc->next = pg_ctype_cache_list;
     939           6 :     pg_ctype_cache_list = pcc;
     940             : 
     941           6 :     return &pcc->cv;
     942             : 
     943             :     /*
     944             :      * Failure, clean up
     945             :      */
     946             : out_of_memory:
     947           0 :     if (pcc->cv.chrs)
     948           0 :         free(pcc->cv.chrs);
     949           0 :     if (pcc->cv.ranges)
     950           0 :         free(pcc->cv.ranges);
     951           0 :     free(pcc);
     952             : 
     953           0 :     return NULL;
     954             : }

Generated by: LCOV version 1.11