LCOV - code coverage report
Current view: top level - src/backend/nodes - read.c (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 99 133 74.4 %
Date: 2017-09-29 15:12:54 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * read.c
       4             :  *    routines to convert a string (legal ascii representation of node) back
       5             :  *    to nodes
       6             :  *
       7             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  *
      11             :  * IDENTIFICATION
      12             :  *    src/backend/nodes/read.c
      13             :  *
      14             :  * HISTORY
      15             :  *    AUTHOR            DATE            MAJOR EVENT
      16             :  *    Andrew Yu         Nov 2, 1994     file creation
      17             :  *
      18             :  *-------------------------------------------------------------------------
      19             :  */
      20             : #include "postgres.h"
      21             : 
      22             : #include <ctype.h>
      23             : 
      24             : #include "nodes/pg_list.h"
      25             : #include "nodes/readfuncs.h"
      26             : #include "nodes/value.h"
      27             : 
      28             : 
      29             : /* Static state for pg_strtok */
      30             : static char *pg_strtok_ptr = NULL;
      31             : 
      32             : 
      33             : /*
      34             :  * stringToNode -
      35             :  *    returns a Node with a given legal ASCII representation
      36             :  */
      37             : void *
      38        5202 : stringToNode(char *str)
      39             : {
      40             :     char       *save_strtok;
      41             :     void       *retval;
      42             : 
      43             :     /*
      44             :      * We save and restore the pre-existing state of pg_strtok. This makes the
      45             :      * world safe for re-entrant invocation of stringToNode, without incurring
      46             :      * a lot of notational overhead by having to pass the next-character
      47             :      * pointer around through all the readfuncs.c code.
      48             :      */
      49        5202 :     save_strtok = pg_strtok_ptr;
      50             : 
      51        5202 :     pg_strtok_ptr = str;        /* point pg_strtok at the string to read */
      52             : 
      53        5202 :     retval = nodeRead(NULL, 0); /* do the reading */
      54             : 
      55        5202 :     pg_strtok_ptr = save_strtok;
      56             : 
      57        5202 :     return retval;
      58             : }
      59             : 
      60             : /*****************************************************************************
      61             :  *
      62             :  * the lisp token parser
      63             :  *
      64             :  *****************************************************************************/
      65             : 
      66             : /*
      67             :  * pg_strtok --- retrieve next "token" from a string.
      68             :  *
      69             :  * Works kinda like strtok, except it never modifies the source string.
      70             :  * (Instead of storing nulls into the string, the length of the token
      71             :  * is returned to the caller.)
      72             :  * Also, the rules about what is a token are hard-wired rather than being
      73             :  * configured by passing a set of terminating characters.
      74             :  *
      75             :  * The string is assumed to have been initialized already by stringToNode.
      76             :  *
      77             :  * The rules for tokens are:
      78             :  *  * Whitespace (space, tab, newline) always separates tokens.
      79             :  *  * The characters '(', ')', '{', '}' form individual tokens even
      80             :  *    without any whitespace around them.
      81             :  *  * Otherwise, a token is all the characters up to the next whitespace
      82             :  *    or occurrence of one of the four special characters.
      83             :  *  * A backslash '\' can be used to quote whitespace or one of the four
      84             :  *    special characters, so that it is treated as a plain token character.
      85             :  *    Backslashes themselves must also be backslashed for consistency.
      86             :  *    Any other character can be, but need not be, backslashed as well.
      87             :  *  * If the resulting token is '<>' (with no backslash), it is returned
      88             :  *    as a non-NULL pointer to the token but with length == 0.  Note that
      89             :  *    there is no other way to get a zero-length token.
      90             :  *
      91             :  * Returns a pointer to the start of the next token, and the length of the
      92             :  * token (including any embedded backslashes!) in *length.  If there are
      93             :  * no more tokens, NULL and 0 are returned.
      94             :  *
      95             :  * NOTE: this routine doesn't remove backslashes; the caller must do so
      96             :  * if necessary (see "debackslash").
      97             :  *
      98             :  * NOTE: prior to release 7.0, this routine also had a special case to treat
      99             :  * a token starting with '"' as extending to the next '"'.  This code was
     100             :  * broken, however, since it would fail to cope with a string containing an
     101             :  * embedded '"'.  I have therefore removed this special case, and instead
     102             :  * introduced rules for using backslashes to quote characters.  Higher-level
     103             :  * code should add backslashes to a string constant to ensure it is treated
     104             :  * as a single token.
     105             :  */
     106             : char *
     107     1536476 : pg_strtok(int *length)
     108             : {
     109             :     char       *local_str;      /* working pointer to string */
     110             :     char       *ret_str;        /* start of token to return */
     111             : 
     112     1536476 :     local_str = pg_strtok_ptr;
     113             : 
     114     4378758 :     while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
     115     1305806 :         local_str++;
     116             : 
     117     1536476 :     if (*local_str == '\0')
     118             :     {
     119           0 :         *length = 0;
     120           0 :         pg_strtok_ptr = local_str;
     121           0 :         return NULL;            /* no more tokens */
     122             :     }
     123             : 
     124             :     /*
     125             :      * Now pointing at start of next token.
     126             :      */
     127     1536476 :     ret_str = local_str;
     128             : 
     129     2992722 :     if (*local_str == '(' || *local_str == ')' ||
     130     2839873 :         *local_str == '{' || *local_str == '}')
     131             :     {
     132             :         /* special 1-character token */
     133      225468 :         local_str++;
     134             :     }
     135             :     else
     136             :     {
     137             :         /* Normal token, possibly containing backslashes */
     138    19620020 :         while (*local_str != '\0' &&
     139    25021899 :                *local_str != ' ' && *local_str != '\n' &&
     140    15867864 :                *local_str != '\t' &&
     141    23777953 :                *local_str != '(' && *local_str != ')' &&
     142    15820178 :                *local_str != '{' && *local_str != '}')
     143             :         {
     144     7843969 :             if (*local_str == '\\' && local_str[1] != '\0')
     145         270 :                 local_str += 2;
     146             :             else
     147     7843699 :                 local_str++;
     148             :         }
     149             :     }
     150             : 
     151     1536476 :     *length = local_str - ret_str;
     152             : 
     153             :     /* Recognize special case for "empty" token */
     154     1536476 :     if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
     155       50637 :         *length = 0;
     156             : 
     157     1536476 :     pg_strtok_ptr = local_str;
     158             : 
     159     1536476 :     return ret_str;
     160             : }
     161             : 
     162             : /*
     163             :  * debackslash -
     164             :  *    create a palloc'd string holding the given token.
     165             :  *    any protective backslashes in the token are removed.
     166             :  */
     167             : char *
     168       57027 : debackslash(char *token, int length)
     169             : {
     170       57027 :     char       *result = palloc(length + 1);
     171       57027 :     char       *ptr = result;
     172             : 
     173      546639 :     while (length > 0)
     174             :     {
     175      432585 :         if (*token == '\\' && length > 1)
     176         270 :             token++, length--;
     177      432585 :         *ptr++ = *token++;
     178      432585 :         length--;
     179             :     }
     180       57027 :     *ptr = '\0';
     181       57027 :     return result;
     182             : }
     183             : 
     184             : #define RIGHT_PAREN (1000000 + 1)
     185             : #define LEFT_PAREN  (1000000 + 2)
     186             : #define LEFT_BRACE  (1000000 + 3)
     187             : #define OTHER_TOKEN (1000000 + 4)
     188             : 
     189             : /*
     190             :  * nodeTokenType -
     191             :  *    returns the type of the node token contained in token.
     192             :  *    It returns one of the following valid NodeTags:
     193             :  *      T_Integer, T_Float, T_String, T_BitString
     194             :  *    and some of its own:
     195             :  *      RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
     196             :  *
     197             :  *    Assumption: the ascii representation is legal
     198             :  */
     199             : static NodeTag
     200      186689 : nodeTokenType(char *token, int length)
     201             : {
     202             :     NodeTag     retval;
     203             :     char       *numptr;
     204             :     int         numlen;
     205             : 
     206             :     /*
     207             :      * Check if the token is a number
     208             :      */
     209      186689 :     numptr = token;
     210      186689 :     numlen = length;
     211      186689 :     if (*numptr == '+' || *numptr == '-')
     212           0 :         numptr++, numlen--;
     213      186689 :     if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
     214       41653 :         (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
     215             :     {
     216             :         /*
     217             :          * Yes.  Figure out whether it is integral or float; this requires
     218             :          * both a syntax check and a range check. strtol() can do both for us.
     219             :          * We know the token will end at a character that strtol will stop at,
     220             :          * so we do not need to modify the string.
     221             :          */
     222             :         long        val;
     223             :         char       *endptr;
     224             : 
     225           0 :         errno = 0;
     226           0 :         val = strtol(token, &endptr, 10);
     227             :         (void) val;             /* avoid compiler warning if unused */
     228           0 :         if (endptr != token + length || errno == ERANGE
     229             : #ifdef HAVE_LONG_INT_64
     230             :         /* if long > 32 bits, check for overflow of int4 */
     231             :             || val != (long) ((int32) val)
     232             : #endif
     233             :             )
     234           0 :             return T_Float;
     235           0 :         return T_Integer;
     236             :     }
     237             : 
     238             :     /*
     239             :      * these three cases do not need length checks, since pg_strtok() will
     240             :      * always treat them as single-byte tokens
     241             :      */
     242      186689 :     else if (*token == '(')
     243       22817 :         retval = LEFT_PAREN;
     244      163872 :     else if (*token == ')')
     245           0 :         retval = RIGHT_PAREN;
     246      163872 :     else if (*token == '{')
     247       72619 :         retval = LEFT_BRACE;
     248       91253 :     else if (*token == '"' && length > 1 && token[length - 1] == '"')
     249       41653 :         retval = T_String;
     250       49600 :     else if (*token == 'b')
     251           0 :         retval = T_BitString;
     252             :     else
     253       49600 :         retval = OTHER_TOKEN;
     254      186689 :     return retval;
     255             : }
     256             : 
     257             : /*
     258             :  * nodeRead -
     259             :  *    Slightly higher-level reader.
     260             :  *
     261             :  * This routine applies some semantic knowledge on top of the purely
     262             :  * lexical tokenizer pg_strtok().   It can read
     263             :  *  * Value token nodes (integers, floats, or strings);
     264             :  *  * General nodes (via parseNodeString() from readfuncs.c);
     265             :  *  * Lists of the above;
     266             :  *  * Lists of integers or OIDs.
     267             :  * The return value is declared void *, not Node *, to avoid having to
     268             :  * cast it explicitly in callers that assign to fields of different types.
     269             :  *
     270             :  * External callers should always pass NULL/0 for the arguments.  Internally
     271             :  * a non-NULL token may be passed when the upper recursion level has already
     272             :  * scanned the first token of a node's representation.
     273             :  *
     274             :  * We assume pg_strtok is already initialized with a string to read (hence
     275             :  * this should only be invoked from within a stringToNode operation).
     276             :  */
     277             : void *
     278      186689 : nodeRead(char *token, int tok_len)
     279             : {
     280             :     Node       *result;
     281             :     NodeTag     type;
     282             : 
     283      186689 :     if (token == NULL)          /* need to read a token? */
     284             :     {
     285       99219 :         token = pg_strtok(&tok_len);
     286             : 
     287       99219 :         if (token == NULL)      /* end of input */
     288           0 :             return NULL;
     289             :     }
     290             : 
     291      186689 :     type = nodeTokenType(token, tok_len);
     292             : 
     293      186689 :     switch ((int) type)
     294             :     {
     295             :         case LEFT_BRACE:
     296       72619 :             result = parseNodeString();
     297       72619 :             token = pg_strtok(&tok_len);
     298       72619 :             if (token == NULL || token[0] != '}')
     299           0 :                 elog(ERROR, "did not find '}' at end of input node");
     300       72619 :             break;
     301             :         case LEFT_PAREN:
     302             :             {
     303       22817 :                 List       *l = NIL;
     304             : 
     305             :                 /*----------
     306             :                  * Could be an integer list:    (i int int ...)
     307             :                  * or an OID list:              (o int int ...)
     308             :                  * or a list of nodes/values:   (node node ...)
     309             :                  *----------
     310             :                  */
     311       22817 :                 token = pg_strtok(&tok_len);
     312       22817 :                 if (token == NULL)
     313           0 :                     elog(ERROR, "unterminated List structure");
     314       23054 :                 if (tok_len == 1 && token[0] == 'i')
     315             :                 {
     316             :                     /* List of integers */
     317             :                     for (;;)
     318             :                     {
     319             :                         int         val;
     320             :                         char       *endptr;
     321             : 
     322        1013 :                         token = pg_strtok(&tok_len);
     323        1013 :                         if (token == NULL)
     324           0 :                             elog(ERROR, "unterminated List structure");
     325        1013 :                         if (token[0] == ')')
     326         237 :                             break;
     327         776 :                         val = (int) strtol(token, &endptr, 10);
     328         776 :                         if (endptr != token + tok_len)
     329           0 :                             elog(ERROR, "unrecognized integer: \"%.*s\"",
     330             :                                  tok_len, token);
     331         776 :                         l = lappend_int(l, val);
     332         776 :                     }
     333             :                 }
     334       23144 :                 else if (tok_len == 1 && token[0] == 'o')
     335             :                 {
     336             :                     /* List of OIDs */
     337             :                     for (;;)
     338             :                     {
     339             :                         Oid         val;
     340             :                         char       *endptr;
     341             : 
     342        2249 :                         token = pg_strtok(&tok_len);
     343        2249 :                         if (token == NULL)
     344           0 :                             elog(ERROR, "unterminated List structure");
     345        2249 :                         if (token[0] == ')')
     346         564 :                             break;
     347        1685 :                         val = (Oid) strtoul(token, &endptr, 10);
     348        1685 :                         if (endptr != token + tok_len)
     349           0 :                             elog(ERROR, "unrecognized OID: \"%.*s\"",
     350             :                                  tok_len, token);
     351        1685 :                         l = lappend_oid(l, val);
     352        1685 :                     }
     353             :                 }
     354             :                 else
     355             :                 {
     356             :                     /* List of other node types */
     357             :                     for (;;)
     358             :                     {
     359             :                         /* We have already scanned next token... */
     360      109486 :                         if (token[0] == ')')
     361       22016 :                             break;
     362       87470 :                         l = lappend(l, nodeRead(token, tok_len));
     363       87470 :                         token = pg_strtok(&tok_len);
     364       87470 :                         if (token == NULL)
     365           0 :                             elog(ERROR, "unterminated List structure");
     366       87470 :                     }
     367             :                 }
     368       22817 :                 result = (Node *) l;
     369       22817 :                 break;
     370             :             }
     371             :         case RIGHT_PAREN:
     372           0 :             elog(ERROR, "unexpected right parenthesis");
     373             :             result = NULL;      /* keep compiler happy */
     374             :             break;
     375             :         case OTHER_TOKEN:
     376       49600 :             if (tok_len == 0)
     377             :             {
     378             :                 /* must be "<>" --- represents a null pointer */
     379       49600 :                 result = NULL;
     380             :             }
     381             :             else
     382             :             {
     383           0 :                 elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
     384             :                 result = NULL;  /* keep compiler happy */
     385             :             }
     386       49600 :             break;
     387             :         case T_Integer:
     388             : 
     389             :             /*
     390             :              * we know that the token terminates on a char atol will stop at
     391             :              */
     392           0 :             result = (Node *) makeInteger(atol(token));
     393           0 :             break;
     394             :         case T_Float:
     395             :             {
     396           0 :                 char       *fval = (char *) palloc(tok_len + 1);
     397             : 
     398           0 :                 memcpy(fval, token, tok_len);
     399           0 :                 fval[tok_len] = '\0';
     400           0 :                 result = (Node *) makeFloat(fval);
     401             :             }
     402           0 :             break;
     403             :         case T_String:
     404             :             /* need to remove leading and trailing quotes, and backslashes */
     405       41653 :             result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
     406       41653 :             break;
     407             :         case T_BitString:
     408             :             {
     409           0 :                 char       *val = palloc(tok_len);
     410             : 
     411             :                 /* skip leading 'b' */
     412           0 :                 memcpy(val, token + 1, tok_len - 1);
     413           0 :                 val[tok_len - 1] = '\0';
     414           0 :                 result = (Node *) makeBitString(val);
     415           0 :                 break;
     416             :             }
     417             :         default:
     418           0 :             elog(ERROR, "unrecognized node type: %d", (int) type);
     419             :             result = NULL;      /* keep compiler happy */
     420             :             break;
     421             :     }
     422             : 
     423      186689 :     return (void *) result;
     424             : }

Generated by: LCOV version 1.11