LCOV - PostgreSQL - src/backend/parser/scansup.c

LCOV - code coverage report

Current view:	top level - src/backend/parser - scansup.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL	Lines:	56	69	81.2 %
Date:	2017-09-29 13:40:31	Functions:	5	5	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * scansup.c
       4             :  *    support routines for the lex/flex scanner, used by both the normal
       5             :  * backend as well as the bootstrap backend
       6             :  *
       7             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  *
      11             :  * IDENTIFICATION
      12             :  *    src/backend/parser/scansup.c
      13             :  *
      14             :  *-------------------------------------------------------------------------
      15             :  */
      16             : #include "postgres.h"
      17             : 
      18             : #include <ctype.h>
      19             : 
      20             : #include "parser/scansup.h"
      21             : #include "mb/pg_wchar.h"
      22             : 
      23             : 
      24             : /* ----------------
      25             :  *      scanstr
      26             :  *
      27             :  * if the string passed in has escaped codes, map the escape codes to actual
      28             :  * chars
      29             :  *
      30             :  * the string returned is palloc'd and should eventually be pfree'd by the
      31             :  * caller!
      32             :  * ----------------
      33             :  */
      34             : 
      35             : char *
      36       97116 : scanstr(const char *s)
      37             : {
      38             :     char       *newStr;
      39             :     int         len,
      40             :                 i,
      41             :                 j;
      42             : 
      43       97116 :     if (s == NULL || s[0] == '\0')
      44         268 :         return pstrdup("");
      45             : 
      46       96848 :     len = strlen(s);
      47             : 
      48       96848 :     newStr = palloc(len + 1);   /* string cannot get longer */
      49             : 
      50      369648 :     for (i = 0, j = 0; i < len; i++)
      51             :     {
      52      272800 :         if (s[i] == '\'')
      53             :         {
      54             :             /*
      55             :              * Note: if scanner is working right, unescaped quotes can only
      56             :              * appear in pairs, so there should be another character.
      57             :              */
      58          12 :             i++;
      59             :             /* The bootstrap parser is not as smart, so check here. */
      60          12 :             Assert(s[i] == '\'');
      61          12 :             newStr[j] = s[i];
      62             :         }
      63      272788 :         else if (s[i] == '\\')
      64             :         {
      65         167 :             i++;
      66         167 :             switch (s[i])
      67             :             {
      68             :                 case 'b':
      69           0 :                     newStr[j] = '\b';
      70           0 :                     break;
      71             :                 case 'f':
      72           0 :                     newStr[j] = '\f';
      73           0 :                     break;
      74             :                 case 'n':
      75           0 :                     newStr[j] = '\n';
      76           0 :                     break;
      77             :                 case 'r':
      78           0 :                     newStr[j] = '\r';
      79           0 :                     break;
      80             :                 case 't':
      81           0 :                     newStr[j] = '\t';
      82           0 :                     break;
      83             :                 case '0':
      84             :                 case '1':
      85             :                 case '2':
      86             :                 case '3':
      87             :                 case '4':
      88             :                 case '5':
      89             :                 case '6':
      90             :                 case '7':
      91             :                     {
      92             :                         int         k;
      93         167 :                         long        octVal = 0;
      94             : 
      95         835 :                         for (k = 0;
      96        1169 :                              s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
      97         501 :                              k++)
      98         501 :                             octVal = (octVal << 3) + (s[i + k] - '0');
      99         167 :                         i += k - 1;
     100         167 :                         newStr[j] = ((char) octVal);
     101             :                     }
     102         167 :                     break;
     103             :                 default:
     104           0 :                     newStr[j] = s[i];
     105           0 :                     break;
     106             :             }                   /* switch */
     107             :         }                       /* s[i] == '\\' */
     108             :         else
     109      272621 :             newStr[j] = s[i];
     110      272800 :         j++;
     111             :     }
     112       96848 :     newStr[j] = '\0';
     113       96848 :     return newStr;
     114             : }
     115             : 
     116             : 
     117             : /*
     118             :  * downcase_truncate_identifier() --- do appropriate downcasing and
     119             :  * truncation of an unquoted identifier.  Optionally warn of truncation.
     120             :  *
     121             :  * Returns a palloc'd string containing the adjusted identifier.
     122             :  *
     123             :  * Note: in some usages the passed string is not null-terminated.
     124             :  *
     125             :  * Note: the API of this function is designed to allow for downcasing
     126             :  * transformations that increase the string length, but we don't yet
     127             :  * support that.  If you want to implement it, you'll need to fix
     128             :  * SplitIdentifierString() in utils/adt/varlena.c.
     129             :  */
     130             : char *
     131      189024 : downcase_truncate_identifier(const char *ident, int len, bool warn)
     132             : {
     133      189024 :     return downcase_identifier(ident, len, warn, true);
     134             : }
     135             : 
     136             : /*
     137             :  * a workhorse for downcase_truncate_identifier
     138             :  */
     139             : char *
     140      189041 : downcase_identifier(const char *ident, int len, bool warn, bool truncate)
     141             : {
     142             :     char       *result;
     143             :     int         i;
     144             :     bool        enc_is_single_byte;
     145             : 
     146      189041 :     result = palloc(len + 1);
     147      189041 :     enc_is_single_byte = pg_database_encoding_max_length() == 1;
     148             : 
     149             :     /*
     150             :      * SQL99 specifies Unicode-aware case normalization, which we don't yet
     151             :      * have the infrastructure for.  Instead we use tolower() to provide a
     152             :      * locale-aware translation.  However, there are some locales where this
     153             :      * is not right either (eg, Turkish may do strange things with 'i' and
     154             :      * 'I').  Our current compromise is to use tolower() for characters with
     155             :      * the high bit set, as long as they aren't part of a multi-byte
     156             :      * character, and use an ASCII-only downcasing for 7-bit characters.
     157             :      */
     158     1457173 :     for (i = 0; i < len; i++)
     159             :     {
     160     1268132 :         unsigned char ch = (unsigned char) ident[i];
     161             : 
     162     1268132 :         if (ch >= 'A' && ch <= 'Z')
     163       43784 :             ch += 'a' - 'A';
     164     1224348 :         else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
     165           0 :             ch = tolower(ch);
     166     1268132 :         result[i] = (char) ch;
     167             :     }
     168      189041 :     result[i] = '\0';
     169             : 
     170      189041 :     if (i >= NAMEDATALEN && truncate)
     171           2 :         truncate_identifier(result, i, warn);
     172             : 
     173      189041 :     return result;
     174             : }
     175             : 
     176             : 
     177             : /*
     178             :  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
     179             :  *
     180             :  * The given string is modified in-place, if necessary.  A warning is
     181             :  * issued if requested.
     182             :  *
     183             :  * We require the caller to pass in the string length since this saves a
     184             :  * strlen() call in some common usages.
     185             :  */
     186             : void
     187        6646 : truncate_identifier(char *ident, int len, bool warn)
     188             : {
     189        6646 :     if (len >= NAMEDATALEN)
     190             :     {
     191           2 :         len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);
     192           2 :         if (warn)
     193             :         {
     194             :             /*
     195             :              * We avoid using %.*s here because it can misbehave if the data
     196             :              * is not valid in what libc thinks is the prevailing encoding.
     197             :              */
     198             :             char        buf[NAMEDATALEN];
     199             : 
     200           2 :             memcpy(buf, ident, len);
     201           2 :             buf[len] = '\0';
     202           2 :             ereport(NOTICE,
     203             :                     (errcode(ERRCODE_NAME_TOO_LONG),
     204             :                      errmsg("identifier \"%s\" will be truncated to \"%s\"",
     205             :                             ident, buf)));
     206             :         }
     207           2 :         ident[len] = '\0';
     208             :     }
     209        6646 : }
     210             : 
     211             : /*
     212             :  * scanner_isspace() --- return TRUE if flex scanner considers char whitespace
     213             :  *
     214             :  * This should be used instead of the potentially locale-dependent isspace()
     215             :  * function when it's important to match the lexer's behavior.
     216             :  *
     217             :  * In principle we might need similar functions for isalnum etc, but for the
     218             :  * moment only isspace seems needed.
     219             :  */
     220             : bool
     221       46198 : scanner_isspace(char ch)
     222             : {
     223             :     /* This must match scan.l's list of {space} characters */
     224       46198 :     if (ch == ' ' ||
     225       44010 :         ch == '\t' ||
     226       43973 :         ch == '\n' ||
     227       43972 :         ch == '\r' ||
     228             :         ch == '\f')
     229        2226 :         return true;
     230       43972 :     return false;
     231             : }

Generated by: LCOV version 1.11