LCOV - PostgreSQL - src/pl/plpgsql/src/pl

LCOV - code coverage report

Current view:	top level - src/pl/plpgsql/src - pl_scanner.c (source / functions)		Hit	Total	Coverage
Test:	PostgreSQL	Lines:	140	156	89.7 %
Date:	2017-09-29 15:12:54	Functions:	15	15	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * pl_scanner.c
       4             :  *    lexical scanning for PL/pgSQL
       5             :  *
       6             :  *
       7             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  *
      11             :  * IDENTIFICATION
      12             :  *    src/pl/plpgsql/src/pl_scanner.c
      13             :  *
      14             :  *-------------------------------------------------------------------------
      15             :  */
      16             : #include "postgres.h"
      17             : 
      18             : #include "mb/pg_wchar.h"
      19             : #include "parser/scanner.h"
      20             : 
      21             : #include "plpgsql.h"
      22             : #include "pl_gram.h"          /* must be after parser/scanner.h */
      23             : 
      24             : 
      25             : #define PG_KEYWORD(a,b,c) {a,b,c},
      26             : 
      27             : 
      28             : /* Klugy flag to tell scanner how to look up identifiers */
      29             : IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
      30             : 
      31             : /*
      32             :  * A word about keywords:
      33             :  *
      34             :  * We keep reserved and unreserved keywords in separate arrays.  The
      35             :  * reserved keywords are passed to the core scanner, so they will be
      36             :  * recognized before (and instead of) any variable name.  Unreserved words
      37             :  * are checked for separately, usually after determining that the identifier
      38             :  * isn't a known variable name.  If plpgsql_IdentifierLookup is DECLARE then
      39             :  * no variable names will be recognized, so the unreserved words always work.
      40             :  * (Note in particular that this helps us avoid reserving keywords that are
      41             :  * only needed in DECLARE sections.)
      42             :  *
      43             :  * In certain contexts it is desirable to prefer recognizing an unreserved
      44             :  * keyword over recognizing a variable name.  In particular, at the start
      45             :  * of a statement we should prefer unreserved keywords unless the statement
      46             :  * looks like an assignment (i.e., first token is followed by ':=' or '[').
      47             :  * This rule allows most statement-introducing keywords to be kept unreserved.
      48             :  * (We still have to reserve initial keywords that might follow a block
      49             :  * label, unfortunately, since the method used to determine if we are at
      50             :  * start of statement doesn't recognize such cases.  We'd also have to
      51             :  * reserve any keyword that could legitimately be followed by ':=' or '['.)
      52             :  * Some additional cases are handled in pl_gram.y using tok_is_keyword().
      53             :  *
      54             :  * We try to avoid reserving more keywords than we have to; but there's
      55             :  * little point in not reserving a word if it's reserved in the core grammar.
      56             :  * Currently, the following words are reserved here but not in the core:
      57             :  * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE
      58             :  */
      59             : 
      60             : /*
      61             :  * Lists of keyword (name, token-value, category) entries.
      62             :  *
      63             :  * !!WARNING!!: These lists must be sorted by ASCII name, because binary
      64             :  *       search is used to locate entries.
      65             :  *
      66             :  * Be careful not to put the same word in both lists.  Also be sure that
      67             :  * pl_gram.y's unreserved_keyword production agrees with the second list.
      68             :  */
      69             : 
      70             : static const ScanKeyword reserved_keywords[] = {
      71             :     PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
      72             :     PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
      73             :     PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
      74             :     PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
      75             :     PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
      76             :     PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
      77             :     PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
      78             :     PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
      79             :     PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
      80             :     PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
      81             :     PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
      82             :     PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
      83             :     PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
      84             :     PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
      85             :     PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
      86             :     PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
      87             :     PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
      88             :     PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
      89             :     PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
      90             :     PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
      91             :     PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
      92             :     PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
      93             :     PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
      94             :     PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
      95             : };
      96             : 
      97             : static const int num_reserved_keywords = lengthof(reserved_keywords);
      98             : 
      99             : static const ScanKeyword unreserved_keywords[] = {
     100             :     PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
     101             :     PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
     102             :     PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
     103             :     PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
     104             :     PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
     105             :     PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
     106             :     PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
     107             :     PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
     108             :     PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
     109             :     PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
     110             :     PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
     111             :     PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
     112             :     PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
     113             :     PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
     114             :     PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
     115             :     PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
     116             :     PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
     117             :     PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
     118             :     PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
     119             :     PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
     120             :     PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
     121             :     PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
     122             :     PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
     123             :     PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
     124             :     PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
     125             :     PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
     126             :     PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
     127             :     PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
     128             :     PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
     129             :     PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
     130             :     PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
     131             :     PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
     132             :     PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
     133             :     PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
     134             :     PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
     135             :     PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
     136             :     PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
     137             :     PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
     138             :     PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
     139             :     PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
     140             :     PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
     141             :     PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
     142             :     PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
     143             :     PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
     144             :     PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
     145             :     PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
     146             :     PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
     147             :     PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
     148             :     PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
     149             :     PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
     150             :     PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
     151             :     PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
     152             :     PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
     153             :     PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
     154             :     PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
     155             :     PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
     156             :     PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
     157             :     PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD)
     158             :     PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
     159             :     PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
     160             :     PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
     161             :     PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
     162             :     PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
     163             :     PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
     164             :     PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
     165             :     PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
     166             :     PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
     167             :     PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
     168             :     PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
     169             :     PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
     170             :     PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
     171             :     PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
     172             :     PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
     173             :     PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
     174             :     PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
     175             :     PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
     176             : };
     177             : 
     178             : static const int num_unreserved_keywords = lengthof(unreserved_keywords);
     179             : 
     180             : /*
     181             :  * This macro must recognize all tokens that can immediately precede a
     182             :  * PL/pgSQL executable statement (that is, proc_sect or proc_stmt in the
     183             :  * grammar).  Fortunately, there are not very many, so hard-coding in this
     184             :  * fashion seems sufficient.
     185             :  */
     186             : #define AT_STMT_START(prev_token) \
     187             :     ((prev_token) == ';' || \
     188             :      (prev_token) == K_BEGIN || \
     189             :      (prev_token) == K_THEN || \
     190             :      (prev_token) == K_ELSE || \
     191             :      (prev_token) == K_LOOP)
     192             : 
     193             : 
     194             : /* Auxiliary data about a token (other than the token type) */
     195             : typedef struct
     196             : {
     197             :     YYSTYPE     lval;           /* semantic information */
     198             :     YYLTYPE     lloc;           /* offset in scanbuf */
     199             :     int         leng;           /* length in bytes */
     200             : } TokenAuxData;
     201             : 
     202             : /*
     203             :  * Scanner working state.  At some point we might wish to fold all this
     204             :  * into a YY_EXTRA struct.  For the moment, there is no need for plpgsql's
     205             :  * lexer to be re-entrant, and the notational burden of passing a yyscanner
     206             :  * pointer around is great enough to not want to do it without need.
     207             :  */
     208             : 
     209             : /* The stuff the core lexer needs */
     210             : static core_yyscan_t yyscanner = NULL;
     211             : static core_yy_extra_type core_yy;
     212             : 
     213             : /* The original input string */
     214             : static const char *scanorig;
     215             : 
     216             : /* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */
     217             : static int  plpgsql_yyleng;
     218             : 
     219             : /* Current token's code (corresponds to plpgsql_yylval and plpgsql_yylloc) */
     220             : static int  plpgsql_yytoken;
     221             : 
     222             : /* Token pushback stack */
     223             : #define MAX_PUSHBACKS 4
     224             : 
     225             : static int  num_pushbacks;
     226             : static int  pushback_token[MAX_PUSHBACKS];
     227             : static TokenAuxData pushback_auxdata[MAX_PUSHBACKS];
     228             : 
     229             : /* State for plpgsql_location_to_lineno() */
     230             : static const char *cur_line_start;
     231             : static const char *cur_line_end;
     232             : static int  cur_line_num;
     233             : 
     234             : /* Internal functions */
     235             : static int  internal_yylex(TokenAuxData *auxdata);
     236             : static void push_back_token(int token, TokenAuxData *auxdata);
     237             : static void location_lineno_init(void);
     238             : 
     239             : 
     240             : /*
     241             :  * This is the yylex routine called from the PL/pgSQL grammar.
     242             :  * It is a wrapper around the core lexer, with the ability to recognize
     243             :  * PL/pgSQL variables and return them as special T_DATUM tokens.  If a
     244             :  * word or compound word does not match any variable name, or if matching
     245             :  * is turned off by plpgsql_IdentifierLookup, it is returned as
     246             :  * T_WORD or T_CWORD respectively, or as an unreserved keyword if it
     247             :  * matches one of those.
     248             :  */
     249             : int
     250       25848 : plpgsql_yylex(void)
     251             : {
     252             :     int         tok1;
     253             :     TokenAuxData aux1;
     254             :     const ScanKeyword *kw;
     255             : 
     256       25848 :     tok1 = internal_yylex(&aux1);
     257       25848 :     if (tok1 == IDENT || tok1 == PARAM)
     258             :     {
     259             :         int         tok2;
     260             :         TokenAuxData aux2;
     261             : 
     262        8726 :         tok2 = internal_yylex(&aux2);
     263        8726 :         if (tok2 == '.')
     264             :         {
     265             :             int         tok3;
     266             :             TokenAuxData aux3;
     267             : 
     268         923 :             tok3 = internal_yylex(&aux3);
     269         923 :             if (tok3 == IDENT)
     270             :             {
     271             :                 int         tok4;
     272             :                 TokenAuxData aux4;
     273             : 
     274         901 :                 tok4 = internal_yylex(&aux4);
     275         901 :                 if (tok4 == '.')
     276             :                 {
     277             :                     int         tok5;
     278             :                     TokenAuxData aux5;
     279             : 
     280           2 :                     tok5 = internal_yylex(&aux5);
     281           2 :                     if (tok5 == IDENT)
     282             :                     {
     283           2 :                         if (plpgsql_parse_tripword(aux1.lval.str,
     284             :                                                    aux3.lval.str,
     285             :                                                    aux5.lval.str,
     286             :                                                    &aux1.lval.wdatum,
     287             :                                                    &aux1.lval.cword))
     288           2 :                             tok1 = T_DATUM;
     289             :                         else
     290           0 :                             tok1 = T_CWORD;
     291             :                     }
     292             :                     else
     293             :                     {
     294             :                         /* not A.B.C, so just process A.B */
     295           0 :                         push_back_token(tok5, &aux5);
     296           0 :                         push_back_token(tok4, &aux4);
     297           0 :                         if (plpgsql_parse_dblword(aux1.lval.str,
     298             :                                                   aux3.lval.str,
     299             :                                                   &aux1.lval.wdatum,
     300             :                                                   &aux1.lval.cword))
     301           0 :                             tok1 = T_DATUM;
     302             :                         else
     303           0 :                             tok1 = T_CWORD;
     304             :                     }
     305             :                 }
     306             :                 else
     307             :                 {
     308             :                     /* not A.B.C, so just process A.B */
     309         899 :                     push_back_token(tok4, &aux4);
     310         899 :                     if (plpgsql_parse_dblword(aux1.lval.str,
     311             :                                               aux3.lval.str,
     312             :                                               &aux1.lval.wdatum,
     313             :                                               &aux1.lval.cword))
     314         831 :                         tok1 = T_DATUM;
     315             :                     else
     316          68 :                         tok1 = T_CWORD;
     317             :                 }
     318             :             }
     319             :             else
     320             :             {
     321             :                 /* not A.B, so just process A */
     322          22 :                 push_back_token(tok3, &aux3);
     323          22 :                 push_back_token(tok2, &aux2);
     324          22 :                 if (plpgsql_parse_word(aux1.lval.str,
     325          22 :                                        core_yy.scanbuf + aux1.lloc,
     326             :                                        &aux1.lval.wdatum,
     327             :                                        &aux1.lval.word))
     328           0 :                     tok1 = T_DATUM;
     329          44 :                 else if (!aux1.lval.word.quoted &&
     330          22 :                          (kw = ScanKeywordLookup(aux1.lval.word.ident,
     331             :                                                  unreserved_keywords,
     332             :                                                  num_unreserved_keywords)))
     333             :                 {
     334           0 :                     aux1.lval.keyword = kw->name;
     335           0 :                     tok1 = kw->value;
     336             :                 }
     337             :                 else
     338          22 :                     tok1 = T_WORD;
     339             :             }
     340             :         }
     341             :         else
     342             :         {
     343             :             /* not A.B, so just process A */
     344        7803 :             push_back_token(tok2, &aux2);
     345             : 
     346             :             /*
     347             :              * If we are at start of statement, prefer unreserved keywords
     348             :              * over variable names, unless the next token is assignment or
     349             :              * '[', in which case prefer variable names.  (Note we need not
     350             :              * consider '.' as the next token; that case was handled above,
     351             :              * and we always prefer variable names in that case.)  If we are
     352             :              * not at start of statement, always prefer variable names over
     353             :              * unreserved keywords.
     354             :              */
     355        7803 :             if (AT_STMT_START(plpgsql_yytoken) &&
     356        2059 :                 !(tok2 == '=' || tok2 == COLON_EQUALS || tok2 == '['))
     357             :             {
     358             :                 /* try for unreserved keyword, then for variable name */
     359        5370 :                 if (core_yy.scanbuf[aux1.lloc] != '"' &&
     360        1790 :                     (kw = ScanKeywordLookup(aux1.lval.str,
     361             :                                             unreserved_keywords,
     362             :                                             num_unreserved_keywords)))
     363             :                 {
     364        1396 :                     aux1.lval.keyword = kw->name;
     365        1396 :                     tok1 = kw->value;
     366             :                 }
     367         394 :                 else if (plpgsql_parse_word(aux1.lval.str,
     368         394 :                                             core_yy.scanbuf + aux1.lloc,
     369             :                                             &aux1.lval.wdatum,
     370             :                                             &aux1.lval.word))
     371           0 :                     tok1 = T_DATUM;
     372             :                 else
     373         394 :                     tok1 = T_WORD;
     374             :             }
     375             :             else
     376             :             {
     377             :                 /* try for variable name, then for unreserved keyword */
     378        6013 :                 if (plpgsql_parse_word(aux1.lval.str,
     379        6013 :                                        core_yy.scanbuf + aux1.lloc,
     380             :                                        &aux1.lval.wdatum,
     381             :                                        &aux1.lval.word))
     382         911 :                     tok1 = T_DATUM;
     383       10204 :                 else if (!aux1.lval.word.quoted &&
     384        5102 :                          (kw = ScanKeywordLookup(aux1.lval.word.ident,
     385             :                                                  unreserved_keywords,
     386             :                                                  num_unreserved_keywords)))
     387             :                 {
     388         730 :                     aux1.lval.keyword = kw->name;
     389         730 :                     tok1 = kw->value;
     390             :                 }
     391             :                 else
     392        4372 :                     tok1 = T_WORD;
     393             :             }
     394             :         }
     395             :     }
     396             :     else
     397             :     {
     398             :         /*
     399             :          * Not a potential plpgsql variable name, just return the data.
     400             :          *
     401             :          * Note that we also come through here if the grammar pushed back a
     402             :          * T_DATUM, T_CWORD, T_WORD, or unreserved-keyword token returned by a
     403             :          * previous lookup cycle; thus, pushbacks do not incur extra lookup
     404             :          * work, since we'll never do the above code twice for the same token.
     405             :          * This property also makes it safe to rely on the old value of
     406             :          * plpgsql_yytoken in the is-this-start-of-statement test above.
     407             :          */
     408             :     }
     409             : 
     410       25848 :     plpgsql_yylval = aux1.lval;
     411       25848 :     plpgsql_yylloc = aux1.lloc;
     412       25848 :     plpgsql_yyleng = aux1.leng;
     413       25848 :     plpgsql_yytoken = tok1;
     414       25848 :     return tok1;
     415             : }
     416             : 
     417             : /*
     418             :  * Internal yylex function.  This wraps the core lexer and adds one feature:
     419             :  * a token pushback stack.  We also make a couple of trivial single-token
     420             :  * translations from what the core lexer does to what we want, in particular
     421             :  * interfacing from the core_YYSTYPE to YYSTYPE union.
     422             :  */
     423             : static int
     424       36847 : internal_yylex(TokenAuxData *auxdata)
     425             : {
     426             :     int         token;
     427             :     const char *yytext;
     428             : 
     429       36847 :     if (num_pushbacks > 0)
     430             :     {
     431       11034 :         num_pushbacks--;
     432       11034 :         token = pushback_token[num_pushbacks];
     433       11034 :         *auxdata = pushback_auxdata[num_pushbacks];
     434             :     }
     435             :     else
     436             :     {
     437       25813 :         token = core_yylex(&auxdata->lval.core_yystype,
     438             :                            &auxdata->lloc,
     439             :                            yyscanner);
     440             : 
     441             :         /* remember the length of yytext before it gets changed */
     442       25813 :         yytext = core_yy.scanbuf + auxdata->lloc;
     443       25813 :         auxdata->leng = strlen(yytext);
     444             : 
     445             :         /* Check for << >> and #, which the core considers operators */
     446       25813 :         if (token == Op)
     447             :         {
     448         187 :             if (strcmp(auxdata->lval.str, "<<") == 0)
     449          15 :                 token = LESS_LESS;
     450         172 :             else if (strcmp(auxdata->lval.str, ">>") == 0)
     451          15 :                 token = GREATER_GREATER;
     452         157 :             else if (strcmp(auxdata->lval.str, "#") == 0)
     453           4 :                 token = '#';
     454             :         }
     455             : 
     456             :         /* The core returns PARAM as ival, but we treat it like IDENT */
     457       25626 :         else if (token == PARAM)
     458             :         {
     459         173 :             auxdata->lval.str = pstrdup(yytext);
     460             :         }
     461             :     }
     462             : 
     463       36847 :     return token;
     464             : }
     465             : 
     466             : /*
     467             :  * Push back a token to be re-read by next internal_yylex() call.
     468             :  */
     469             : static void
     470       11039 : push_back_token(int token, TokenAuxData *auxdata)
     471             : {
     472       11039 :     if (num_pushbacks >= MAX_PUSHBACKS)
     473           0 :         elog(ERROR, "too many tokens pushed back");
     474       11039 :     pushback_token[num_pushbacks] = token;
     475       11039 :     pushback_auxdata[num_pushbacks] = *auxdata;
     476       11039 :     num_pushbacks++;
     477       11039 : }
     478             : 
     479             : /*
     480             :  * Push back a single token to be re-read by next plpgsql_yylex() call.
     481             :  *
     482             :  * NOTE: this does not cause yylval or yylloc to "back up".  Also, it
     483             :  * is not a good idea to push back a token code other than what you read.
     484             :  */
     485             : void
     486        1846 : plpgsql_push_back_token(int token)
     487             : {
     488             :     TokenAuxData auxdata;
     489             : 
     490        1846 :     auxdata.lval = plpgsql_yylval;
     491        1846 :     auxdata.lloc = plpgsql_yylloc;
     492        1846 :     auxdata.leng = plpgsql_yyleng;
     493        1846 :     push_back_token(token, &auxdata);
     494        1846 : }
     495             : 
     496             : /*
     497             :  * Tell whether a token is an unreserved keyword.
     498             :  *
     499             :  * (If it is, its lowercased form was returned as the token value, so we
     500             :  * do not need to offer that data here.)
     501             :  */
     502             : bool
     503           4 : plpgsql_token_is_unreserved_keyword(int token)
     504             : {
     505             :     int         i;
     506             : 
     507         308 :     for (i = 0; i < num_unreserved_keywords; i++)
     508             :     {
     509         304 :         if (unreserved_keywords[i].value == token)
     510           0 :             return true;
     511             :     }
     512           4 :     return false;
     513             : }
     514             : 
     515             : /*
     516             :  * Append the function text starting at startlocation and extending to
     517             :  * (not including) endlocation onto the existing contents of "buf".
     518             :  */
     519             : void
     520        3002 : plpgsql_append_source_text(StringInfo buf,
     521             :                            int startlocation, int endlocation)
     522             : {
     523        3002 :     Assert(startlocation <= endlocation);
     524        3002 :     appendBinaryStringInfo(buf, scanorig + startlocation,
     525             :                            endlocation - startlocation);
     526        3002 : }
     527             : 
     528             : /*
     529             :  * Peek one token ahead in the input stream.  Only the token code is
     530             :  * made available, not any of the auxiliary info such as location.
     531             :  *
     532             :  * NB: no variable or unreserved keyword lookup is performed here, they will
     533             :  * be returned as IDENT. Reserved keywords are resolved as usual.
     534             :  */
     535             : int
     536         399 : plpgsql_peek(void)
     537             : {
     538             :     int         tok1;
     539             :     TokenAuxData aux1;
     540             : 
     541         399 :     tok1 = internal_yylex(&aux1);
     542         399 :     push_back_token(tok1, &aux1);
     543         399 :     return tok1;
     544             : }
     545             : 
     546             : /*
     547             :  * Peek two tokens ahead in the input stream. The first token and its
     548             :  * location in the query are returned in *tok1_p and *tok1_loc, second token
     549             :  * and its location in *tok2_p and *tok2_loc.
     550             :  *
     551             :  * NB: no variable or unreserved keyword lookup is performed here, they will
     552             :  * be returned as IDENT. Reserved keywords are resolved as usual.
     553             :  */
     554             : void
     555          24 : plpgsql_peek2(int *tok1_p, int *tok2_p, int *tok1_loc, int *tok2_loc)
     556             : {
     557             :     int         tok1,
     558             :                 tok2;
     559             :     TokenAuxData aux1,
     560             :                 aux2;
     561             : 
     562          24 :     tok1 = internal_yylex(&aux1);
     563          24 :     tok2 = internal_yylex(&aux2);
     564             : 
     565          24 :     *tok1_p = tok1;
     566          24 :     if (tok1_loc)
     567          24 :         *tok1_loc = aux1.lloc;
     568          24 :     *tok2_p = tok2;
     569          24 :     if (tok2_loc)
     570           0 :         *tok2_loc = aux2.lloc;
     571             : 
     572          24 :     push_back_token(tok2, &aux2);
     573          24 :     push_back_token(tok1, &aux1);
     574          24 : }
     575             : 
     576             : /*
     577             :  * plpgsql_scanner_errposition
     578             :  *      Report an error cursor position, if possible.
     579             :  *
     580             :  * This is expected to be used within an ereport() call.  The return value
     581             :  * is a dummy (always 0, in fact).
     582             :  *
     583             :  * Note that this can only be used for messages emitted during initial
     584             :  * parsing of a plpgsql function, since it requires the scanorig string
     585             :  * to still be available.
     586             :  */
     587             : int
     588          29 : plpgsql_scanner_errposition(int location)
     589             : {
     590             :     int         pos;
     591             : 
     592          29 :     if (location < 0 || scanorig == NULL)
     593           0 :         return 0;               /* no-op if location is unknown */
     594             : 
     595             :     /* Convert byte offset to character number */
     596          29 :     pos = pg_mbstrlen_with_len(scanorig, location) + 1;
     597             :     /* And pass it to the ereport mechanism */
     598          29 :     (void) internalerrposition(pos);
     599             :     /* Also pass the function body string */
     600          29 :     return internalerrquery(scanorig);
     601             : }
     602             : 
     603             : /*
     604             :  * plpgsql_yyerror
     605             :  *      Report a lexer or grammar error.
     606             :  *
     607             :  * The message's cursor position refers to the current token (the one
     608             :  * last returned by plpgsql_yylex()).
     609             :  * This is OK for syntax error messages from the Bison parser, because Bison
     610             :  * parsers report error as soon as the first unparsable token is reached.
     611             :  * Beware of using yyerror for other purposes, as the cursor position might
     612             :  * be misleading!
     613             :  */
     614             : void
     615           1 : plpgsql_yyerror(const char *message)
     616             : {
     617           1 :     char       *yytext = core_yy.scanbuf + plpgsql_yylloc;
     618             : 
     619           1 :     if (*yytext == '\0')
     620             :     {
     621           0 :         ereport(ERROR,
     622             :                 (errcode(ERRCODE_SYNTAX_ERROR),
     623             :         /* translator: %s is typically the translation of "syntax error" */
     624             :                  errmsg("%s at end of input", _(message)),
     625             :                  plpgsql_scanner_errposition(plpgsql_yylloc)));
     626             :     }
     627             :     else
     628             :     {
     629             :         /*
     630             :          * If we have done any lookahead then flex will have restored the
     631             :          * character after the end-of-token.  Zap it again so that we report
     632             :          * only the single token here.  This modifies scanbuf but we no longer
     633             :          * care about that.
     634             :          */
     635           1 :         yytext[plpgsql_yyleng] = '\0';
     636             : 
     637           1 :         ereport(ERROR,
     638             :                 (errcode(ERRCODE_SYNTAX_ERROR),
     639             :         /* translator: first %s is typically the translation of "syntax error" */
     640             :                  errmsg("%s at or near \"%s\"", _(message), yytext),
     641             :                  plpgsql_scanner_errposition(plpgsql_yylloc)));
     642             :     }
     643             : }
     644             : 
     645             : /*
     646             :  * Given a location (a byte offset in the function source text),
     647             :  * return a line number.
     648             :  *
     649             :  * We expect that this is typically called for a sequence of increasing
     650             :  * location values, so optimize accordingly by tracking the endpoints
     651             :  * of the "current" line.
     652             :  */
     653             : int
     654        3779 : plpgsql_location_to_lineno(int location)
     655             : {
     656             :     const char *loc;
     657             : 
     658        3779 :     if (location < 0 || scanorig == NULL)
     659           0 :         return 0;               /* garbage in, garbage out */
     660        3779 :     loc = scanorig + location;
     661             : 
     662             :     /* be correct, but not fast, if input location goes backwards */
     663        3779 :     if (loc < cur_line_start)
     664        1167 :         location_lineno_init();
     665             : 
     666       24474 :     while (cur_line_end != NULL && loc > cur_line_end)
     667             :     {
     668       16916 :         cur_line_start = cur_line_end + 1;
     669       16916 :         cur_line_num++;
     670       16916 :         cur_line_end = strchr(cur_line_start, '\n');
     671             :     }
     672             : 
     673        3779 :     return cur_line_num;
     674             : }
     675             : 
     676             : /* initialize or reset the state for plpgsql_location_to_lineno */
     677             : static void
     678        1731 : location_lineno_init(void)
     679             : {
     680        1731 :     cur_line_start = scanorig;
     681        1731 :     cur_line_num = 1;
     682             : 
     683        1731 :     cur_line_end = strchr(cur_line_start, '\n');
     684        1731 : }
     685             : 
     686             : /* return the most recently computed lineno */
     687             : int
     688           3 : plpgsql_latest_lineno(void)
     689             : {
     690           3 :     return cur_line_num;
     691             : }
     692             : 
     693             : 
     694             : /*
     695             :  * Called before any actual parsing is done
     696             :  *
     697             :  * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
     698             :  * Although it is not fed directly to flex, we need the original string
     699             :  * to cite in error messages.
     700             :  */
     701             : void
     702         564 : plpgsql_scanner_init(const char *str)
     703             : {
     704             :     /* Start up the core scanner */
     705         564 :     yyscanner = scanner_init(str, &core_yy,
     706             :                              reserved_keywords, num_reserved_keywords);
     707             : 
     708             :     /*
     709             :      * scanorig points to the original string, which unlike the scanner's
     710             :      * scanbuf won't be modified on-the-fly by flex.  Notice that although
     711             :      * yytext points into scanbuf, we rely on being able to apply locations
     712             :      * (offsets from string start) to scanorig as well.
     713             :      */
     714         564 :     scanorig = str;
     715             : 
     716             :     /* Other setup */
     717         564 :     plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
     718         564 :     plpgsql_yytoken = 0;
     719             : 
     720         564 :     num_pushbacks = 0;
     721             : 
     722         564 :     location_lineno_init();
     723         564 : }
     724             : 
     725             : /*
     726             :  * Called after parsing is done to clean up after plpgsql_scanner_init()
     727             :  */
     728             : void
     729         541 : plpgsql_scanner_finish(void)
     730             : {
     731             :     /* release storage */
     732         541 :     scanner_finish(yyscanner);
     733             :     /* avoid leaving any dangling pointers */
     734         541 :     yyscanner = NULL;
     735         541 :     scanorig = NULL;
     736         541 : }

Generated by: LCOV version 1.11