LCOV - code coverage report
Current view: top level - src/backend/tsearch - ts_locale.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 74 97 76.3 %
Date: 2017-09-29 15:12:54 Functions: 10 11 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * ts_locale.c
       4             :  *      locale compatibility layer for tsearch
       5             :  *
       6             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       7             :  *
       8             :  *
       9             :  * IDENTIFICATION
      10             :  *    src/backend/tsearch/ts_locale.c
      11             :  *
      12             :  *-------------------------------------------------------------------------
      13             :  */
      14             : #include "postgres.h"
      15             : 
      16             : #include "catalog/pg_collation.h"
      17             : #include "storage/fd.h"
      18             : #include "tsearch/ts_locale.h"
      19             : #include "tsearch/ts_public.h"
      20             : 
      21             : static void tsearch_readline_callback(void *arg);
      22             : 
      23             : 
      24             : #ifdef USE_WIDE_UPPER_LOWER
      25             : 
      26             : int
      27         643 : t_isdigit(const char *ptr)
      28             : {
      29         643 :     int         clen = pg_mblen(ptr);
      30             :     wchar_t     character[2];
      31         643 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
      32         643 :     pg_locale_t mylocale = 0;   /* TODO */
      33             : 
      34         643 :     if (clen == 1 || lc_ctype_is_c(collation))
      35         643 :         return isdigit(TOUCHAR(ptr));
      36             : 
      37           0 :     char2wchar(character, 2, ptr, clen, mylocale);
      38             : 
      39           0 :     return iswdigit((wint_t) character[0]);
      40             : }
      41             : 
      42             : int
      43      138909 : t_isspace(const char *ptr)
      44             : {
      45      138909 :     int         clen = pg_mblen(ptr);
      46             :     wchar_t     character[2];
      47      138909 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
      48      138909 :     pg_locale_t mylocale = 0;   /* TODO */
      49             : 
      50      138909 :     if (clen == 1 || lc_ctype_is_c(collation))
      51      138909 :         return isspace(TOUCHAR(ptr));
      52             : 
      53           0 :     char2wchar(character, 2, ptr, clen, mylocale);
      54             : 
      55           0 :     return iswspace((wint_t) character[0]);
      56             : }
      57             : 
      58             : int
      59         984 : t_isalpha(const char *ptr)
      60             : {
      61         984 :     int         clen = pg_mblen(ptr);
      62             :     wchar_t     character[2];
      63         984 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
      64         984 :     pg_locale_t mylocale = 0;   /* TODO */
      65             : 
      66         984 :     if (clen == 1 || lc_ctype_is_c(collation))
      67         984 :         return isalpha(TOUCHAR(ptr));
      68             : 
      69           0 :     char2wchar(character, 2, ptr, clen, mylocale);
      70             : 
      71           0 :     return iswalpha((wint_t) character[0]);
      72             : }
      73             : 
      74             : int
      75         357 : t_isprint(const char *ptr)
      76             : {
      77         357 :     int         clen = pg_mblen(ptr);
      78             :     wchar_t     character[2];
      79         357 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
      80         357 :     pg_locale_t mylocale = 0;   /* TODO */
      81             : 
      82         357 :     if (clen == 1 || lc_ctype_is_c(collation))
      83         357 :         return isprint(TOUCHAR(ptr));
      84             : 
      85           0 :     char2wchar(character, 2, ptr, clen, mylocale);
      86             : 
      87           0 :     return iswprint((wint_t) character[0]);
      88             : }
      89             : #endif                          /* USE_WIDE_UPPER_LOWER */
      90             : 
      91             : 
      92             : /*
      93             :  * Set up to read a file using tsearch_readline().  This facility is
      94             :  * better than just reading the file directly because it provides error
      95             :  * context pointing to the specific line where a problem is detected.
      96             :  *
      97             :  * Expected usage is:
      98             :  *
      99             :  *      tsearch_readline_state trst;
     100             :  *
     101             :  *      if (!tsearch_readline_begin(&trst, filename))
     102             :  *          ereport(ERROR,
     103             :  *                  (errcode(ERRCODE_CONFIG_FILE_ERROR),
     104             :  *                   errmsg("could not open stop-word file \"%s\": %m",
     105             :  *                          filename)));
     106             :  *      while ((line = tsearch_readline(&trst)) != NULL)
     107             :  *          process line;
     108             :  *      tsearch_readline_end(&trst);
     109             :  *
     110             :  * Note that the caller supplies the ereport() for file open failure;
     111             :  * this is so that a custom message can be provided.  The filename string
     112             :  * passed to tsearch_readline_begin() must remain valid through
     113             :  * tsearch_readline_end().
     114             :  */
     115             : bool
     116          61 : tsearch_readline_begin(tsearch_readline_state *stp,
     117             :                        const char *filename)
     118             : {
     119          61 :     if ((stp->fp = AllocateFile(filename, "r")) == NULL)
     120           0 :         return false;
     121          61 :     stp->filename = filename;
     122          61 :     stp->lineno = 0;
     123          61 :     stp->curline = NULL;
     124             :     /* Setup error traceback support for ereport() */
     125          61 :     stp->cb.callback = tsearch_readline_callback;
     126          61 :     stp->cb.arg = (void *) stp;
     127          61 :     stp->cb.previous = error_context_stack;
     128          61 :     error_context_stack = &stp->cb;
     129          61 :     return true;
     130             : }
     131             : 
     132             : /*
     133             :  * Read the next line from a tsearch data file (expected to be in UTF-8), and
     134             :  * convert it to database encoding if needed. The returned string is palloc'd.
     135             :  * NULL return means EOF.
     136             :  */
     137             : char *
     138        2334 : tsearch_readline(tsearch_readline_state *stp)
     139             : {
     140             :     char       *result;
     141             : 
     142        2334 :     stp->lineno++;
     143        2334 :     stp->curline = NULL;
     144        2334 :     result = t_readline(stp->fp);
     145        2334 :     stp->curline = result;
     146        2334 :     return result;
     147             : }
     148             : 
     149             : /*
     150             :  * Close down after reading a file with tsearch_readline()
     151             :  */
     152             : void
     153          61 : tsearch_readline_end(tsearch_readline_state *stp)
     154             : {
     155          61 :     FreeFile(stp->fp);
     156             :     /* Pop the error context stack */
     157          61 :     error_context_stack = stp->cb.previous;
     158          61 : }
     159             : 
     160             : /*
     161             :  * Error context callback for errors occurring while reading a tsearch
     162             :  * configuration file.
     163             :  */
     164             : static void
     165           0 : tsearch_readline_callback(void *arg)
     166             : {
     167           0 :     tsearch_readline_state *stp = (tsearch_readline_state *) arg;
     168             : 
     169             :     /*
     170             :      * We can't include the text of the config line for errors that occur
     171             :      * during t_readline() itself.  This is only partly a consequence of our
     172             :      * arms-length use of that routine: the major cause of such errors is
     173             :      * encoding violations, and we daren't try to print error messages
     174             :      * containing badly-encoded data.
     175             :      */
     176           0 :     if (stp->curline)
     177           0 :         errcontext("line %d of configuration file \"%s\": \"%s\"",
     178             :                    stp->lineno,
     179             :                    stp->filename,
     180             :                    stp->curline);
     181             :     else
     182           0 :         errcontext("line %d of configuration file \"%s\"",
     183             :                    stp->lineno,
     184             :                    stp->filename);
     185           0 : }
     186             : 
     187             : 
     188             : /*
     189             :  * Read the next line from a tsearch data file (expected to be in UTF-8), and
     190             :  * convert it to database encoding if needed. The returned string is palloc'd.
     191             :  * NULL return means EOF.
     192             :  *
     193             :  * Note: direct use of this function is now deprecated.  Go through
     194             :  * tsearch_readline() to provide better error reporting.
     195             :  */
     196             : char *
     197        2334 : t_readline(FILE *fp)
     198             : {
     199             :     int         len;
     200             :     char       *recoded;
     201             :     char        buf[4096];      /* lines must not be longer than this */
     202             : 
     203        2334 :     if (fgets(buf, sizeof(buf), fp) == NULL)
     204          52 :         return NULL;
     205             : 
     206        2282 :     len = strlen(buf);
     207             : 
     208             :     /* Make sure the input is valid UTF-8 */
     209        2282 :     (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
     210             : 
     211             :     /* And convert */
     212        2282 :     recoded = pg_any_to_server(buf, len, PG_UTF8);
     213        2282 :     if (recoded == buf)
     214             :     {
     215             :         /*
     216             :          * conversion didn't pstrdup, so we must. We can use the length of the
     217             :          * original string, because no conversion was done.
     218             :          */
     219        2282 :         recoded = pnstrdup(recoded, len);
     220             :     }
     221             : 
     222        2282 :     return recoded;
     223             : }
     224             : 
     225             : /*
     226             :  * lowerstr --- fold null-terminated string to lower case
     227             :  *
     228             :  * Returned string is palloc'd
     229             :  */
     230             : char *
     231        2134 : lowerstr(const char *str)
     232             : {
     233        2134 :     return lowerstr_with_len(str, strlen(str));
     234             : }
     235             : 
     236             : /*
     237             :  * lowerstr_with_len --- fold string to lower case
     238             :  *
     239             :  * Input string need not be null-terminated.
     240             :  *
     241             :  * Returned string is palloc'd
     242             :  */
     243             : char *
     244        3884 : lowerstr_with_len(const char *str, int len)
     245             : {
     246             :     char       *out;
     247             : 
     248             : #ifdef USE_WIDE_UPPER_LOWER
     249        3884 :     Oid         collation = DEFAULT_COLLATION_OID;  /* TODO */
     250        3884 :     pg_locale_t mylocale = 0;   /* TODO */
     251             : #endif
     252             : 
     253        3884 :     if (len == 0)
     254           0 :         return pstrdup("");
     255             : 
     256             : #ifdef USE_WIDE_UPPER_LOWER
     257             : 
     258             :     /*
     259             :      * Use wide char code only when max encoding length > 1 and ctype != C.
     260             :      * Some operating systems fail with multi-byte encodings and a C locale.
     261             :      * Also, for a C locale there is no need to process as multibyte. From
     262             :      * backend/utils/adt/oracle_compat.c Teodor
     263             :      */
     264        3884 :     if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation))
     265        3884 :     {
     266             :         wchar_t    *wstr,
     267             :                    *wptr;
     268             :         int         wlen;
     269             : 
     270             :         /*
     271             :          * alloc number of wchar_t for worst case, len contains number of
     272             :          * bytes >= number of characters and alloc 1 wchar_t for 0, because
     273             :          * wchar2char wants zero-terminated string
     274             :          */
     275        3884 :         wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
     276             : 
     277        3884 :         wlen = char2wchar(wstr, len + 1, str, len, mylocale);
     278        3884 :         Assert(wlen <= len);
     279             : 
     280       25688 :         while (*wptr)
     281             :         {
     282       17920 :             *wptr = towlower((wint_t) *wptr);
     283       17920 :             wptr++;
     284             :         }
     285             : 
     286             :         /*
     287             :          * Alloc result string for worst case + '\0'
     288             :          */
     289        3884 :         len = pg_database_encoding_max_length() * wlen + 1;
     290        3884 :         out = (char *) palloc(len);
     291             : 
     292        3884 :         wlen = wchar2char(out, wstr, len, mylocale);
     293             : 
     294        3884 :         pfree(wstr);
     295             : 
     296        3884 :         if (wlen < 0)
     297           0 :             ereport(ERROR,
     298             :                     (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
     299             :                      errmsg("conversion from wchar_t to server encoding failed: %m")));
     300        3884 :         Assert(wlen < len);
     301             :     }
     302             :     else
     303             : #endif                          /* USE_WIDE_UPPER_LOWER */
     304             :     {
     305           0 :         const char *ptr = str;
     306             :         char       *outptr;
     307             : 
     308           0 :         outptr = out = (char *) palloc(sizeof(char) * (len + 1));
     309           0 :         while ((ptr - str) < len && *ptr)
     310             :         {
     311           0 :             *outptr++ = tolower(TOUCHAR(ptr));
     312           0 :             ptr++;
     313             :         }
     314           0 :         *outptr = '\0';
     315             :     }
     316             : 
     317        3884 :     return out;
     318             : }

Generated by: LCOV version 1.11