LCOV - code coverage report
Current view: top level - src/fe_utils - psqlscan.l (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 347 452 76.8 %
Date: 2017-09-29 15:12:54 Functions: 17 17 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : %top{
       2             : /*-------------------------------------------------------------------------
       3             :  *
       4             :  * psqlscan.l
       5             :  *    lexical scanner for SQL commands
       6             :  *
       7             :  * This lexer used to be part of psql, and that heritage is reflected in
       8             :  * the file name as well as function and typedef names, though it can now
       9             :  * be used by other frontend programs as well.  It's also possible to extend
      10             :  * this lexer with a compatible add-on lexer to handle program-specific
      11             :  * backslash commands.
      12             :  *
      13             :  * This code is mainly concerned with determining where the end of a SQL
      14             :  * statement is: we are looking for semicolons that are not within quotes,
      15             :  * comments, or parentheses.  The most reliable way to handle this is to
      16             :  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
      17             :  * below are (except for a few) the same as the backend's, but their actions
      18             :  * are just ECHO whereas the backend's actions generally do other things.
      19             :  *
      20             :  * XXX The rules in this file must be kept in sync with the backend lexer!!!
      21             :  *
      22             :  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
      23             :  *
      24             :  * See psqlscan_int.h for additional commentary.
      25             :  *
      26             :  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
      27             :  * Portions Copyright (c) 1994, Regents of the University of California
      28             :  *
      29             :  * IDENTIFICATION
      30             :  *    src/fe_utils/psqlscan.l
      31             :  *
      32             :  *-------------------------------------------------------------------------
      33             :  */
      34             : #include "postgres_fe.h"
      35             : 
      36             : #include "fe_utils/psqlscan.h"
      37             : 
      38             : #include "libpq-fe.h"
      39             : }
      40             : 
      41             : %{
      42             : #include "fe_utils/psqlscan_int.h"
      43             : 
      44             : /*
      45             :  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
      46             :  * doesn't presently make use of that argument, so just declare it as int.
      47             :  */
      48             : typedef int YYSTYPE;
      49             : 
      50             : /*
      51             :  * Set the type of yyextra; we use it as a pointer back to the containing
      52             :  * PsqlScanState.
      53             :  */
      54             : #define YY_EXTRA_TYPE PsqlScanState
      55             : 
      56             : 
      57             : /* Return values from yylex() */
      58             : #define LEXRES_EOL          0   /* end of input */
      59             : #define LEXRES_SEMI         1   /* command-terminating semicolon found */
      60             : #define LEXRES_BACKSLASH    2   /* backslash command start */
      61             : 
      62             : 
      63             : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
      64             : 
      65             : /*
      66             :  * Work around a bug in flex 2.5.35: it emits a couple of functions that
      67             :  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
      68             :  * this would cause warnings.  Providing our own declarations should be
      69             :  * harmless even when the bug gets fixed.
      70             :  */
      71             : extern int  psql_yyget_column(yyscan_t yyscanner);
      72             : extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
      73             : 
      74             : %}
      75             : 
      76             : %option reentrant
      77             : %option bison-bridge
      78             : %option 8bit
      79             : %option never-interactive
      80             : %option nodefault
      81             : %option noinput
      82             : %option nounput
      83             : %option noyywrap
      84             : %option warn
      85             : %option prefix="psql_yy"
      86             : 
      87             : /*
      88             :  * All of the following definitions and rules should exactly match
      89             :  * src/backend/parser/scan.l so far as the flex patterns are concerned.
      90             :  * The rule bodies are just ECHO as opposed to what the backend does,
      91             :  * however.  (But be sure to duplicate code that affects the lexing process,
      92             :  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
      93             :  * whereas scan.l has a separate one for each exclusive state.
      94             :  */
      95             : 
      96             : /*
      97             :  * OK, here is a short description of lex/flex rules behavior.
      98             :  * The longest pattern which matches an input string is always chosen.
      99             :  * For equal-length patterns, the first occurring in the rules list is chosen.
     100             :  * INITIAL is the starting state, to which all non-conditional rules apply.
     101             :  * Exclusive states change parsing rules while the state is active.  When in
     102             :  * an exclusive state, only those rules defined for that state apply.
     103             :  *
     104             :  * We use exclusive states for quoted strings, extended comments,
     105             :  * and to eliminate parsing troubles for numeric strings.
     106             :  * Exclusive states:
     107             :  *  <xb> bit string literal
     108             :  *  <xc> extended C-style comments
     109             :  *  <xd> delimited identifiers (double-quoted identifiers)
     110             :  *  <xh> hexadecimal numeric string
     111             :  *  <xq> standard quoted strings
     112             :  *  <xe> extended quoted strings (support backslash escape sequences)
     113             :  *  <xdolq> $foo$ quoted strings
     114             :  *  <xui> quoted identifier with Unicode escapes
     115             :  *  <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
     116             :  *  <xus> quoted string with Unicode escapes
     117             :  *  <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
     118             :  *
     119             :  * Note: we intentionally don't mimic the backend's <xeu> state; we have
     120             :  * no need to distinguish it from <xe> state, and no good way to get out
     121             :  * of it in error cases.  The backend just throws yyerror() in those
     122             :  * cases, but that's not an option here.
     123             :  */
     124             : 
     125             : %x xb
     126             : %x xc
     127             : %x xd
     128             : %x xh
     129             : %x xe
     130             : %x xq
     131             : %x xdolq
     132             : %x xui
     133             : %x xuiend
     134             : %x xus
     135             : %x xusend
     136             : 
     137             : /*
     138             :  * In order to make the world safe for Windows and Mac clients as well as
     139             :  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
     140             :  * sequence will be seen as two successive newlines, but that doesn't cause
     141             :  * any problems.  Comments that start with -- and extend to the next
     142             :  * newline are treated as equivalent to a single whitespace character.
     143             :  *
     144             :  * NOTE a fine point: if there is no newline following --, we will absorb
     145             :  * everything to the end of the input as a comment.  This is correct.  Older
     146             :  * versions of Postgres failed to recognize -- as a comment if the input
     147             :  * did not end with a newline.
     148             :  *
     149             :  * XXX perhaps \f (formfeed) should be treated as a newline as well?
     150             :  *
     151             :  * XXX if you change the set of whitespace characters, fix scanner_isspace()
     152             :  * to agree, and see also the plpgsql lexer.
     153             :  */
     154             : 
     155             : space           [ \t\n\r\f]
     156             : horiz_space     [ \t\f]
     157             : newline         [\n\r]
     158             : non_newline     [^\n\r]
     159             : 
     160             : comment         ("--"{non_newline}*)
     161             : 
     162             : whitespace      ({space}+|{comment})
     163             : 
     164             : /*
     165             :  * SQL requires at least one newline in the whitespace separating
     166             :  * string literals that are to be concatenated.  Silly, but who are we
     167             :  * to argue?  Note that {whitespace_with_newline} should not have * after
     168             :  * it, whereas {whitespace} should generally have a * after it...
     169             :  */
     170             : 
     171             : special_whitespace      ({space}+|{comment}{newline})
     172             : horiz_whitespace        ({horiz_space}|{comment})
     173             : whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
     174             : 
     175             : /*
     176             :  * To ensure that {quotecontinue} can be scanned without having to back up
     177             :  * if the full pattern isn't matched, we include trailing whitespace in
     178             :  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
     179             :  * except for {quote} followed by whitespace and just one "-" (not two,
     180             :  * which would start a {comment}).  To cover that we have {quotefail}.
     181             :  * The actions for {quotestop} and {quotefail} must throw back characters
     182             :  * beyond the quote proper.
     183             :  */
     184             : quote           '
     185             : quotestop       {quote}{whitespace}*
     186             : quotecontinue   {quote}{whitespace_with_newline}{quote}
     187             : quotefail       {quote}{whitespace}*"-"
     188             : 
     189             : /* Bit string
     190             :  * It is tempting to scan the string for only those characters
     191             :  * which are allowed. However, this leads to silently swallowed
     192             :  * characters if illegal characters are included in the string.
     193             :  * For example, if xbinside is [01] then B'ABCD' is interpreted
     194             :  * as a zero-length string, and the ABCD' is lost!
     195             :  * Better to pass the string forward and let the input routines
     196             :  * validate the contents.
     197             :  */
     198             : xbstart         [bB]{quote}
     199             : xbinside        [^']*
     200             : 
     201             : /* Hexadecimal number */
     202             : xhstart         [xX]{quote}
     203             : xhinside        [^']*
     204             : 
     205             : /* National character */
     206             : xnstart         [nN]{quote}
     207             : 
     208             : /* Quoted string that allows backslash escapes */
     209             : xestart         [eE]{quote}
     210             : xeinside        [^\\']+
     211             : xeescape        [\\][^0-7]
     212             : xeoctesc        [\\][0-7]{1,3}
     213             : xehexesc        [\\]x[0-9A-Fa-f]{1,2}
     214             : xeunicode       [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
     215             : xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
     216             : 
     217             : /* Extended quote
     218             :  * xqdouble implements embedded quote, ''''
     219             :  */
     220             : xqstart         {quote}
     221             : xqdouble        {quote}{quote}
     222             : xqinside        [^']+
     223             : 
     224             : /* $foo$ style quotes ("dollar quoting")
     225             :  * The quoted string starts with $foo$ where "foo" is an optional string
     226             :  * in the form of an identifier, except that it may not contain "$",
     227             :  * and extends to the first occurrence of an identical string.
     228             :  * There is *no* processing of the quoted text.
     229             :  *
     230             :  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
     231             :  * fails to match its trailing "$".
     232             :  */
     233             : dolq_start      [A-Za-z\200-\377_]
     234             : dolq_cont       [A-Za-z\200-\377_0-9]
     235             : dolqdelim       \$({dolq_start}{dolq_cont}*)?\$
     236             : dolqfailed      \${dolq_start}{dolq_cont}*
     237             : dolqinside      [^$]+
     238             : 
     239             : /* Double quote
     240             :  * Allows embedded spaces and other special characters into identifiers.
     241             :  */
     242             : dquote          \"
     243             : xdstart         {dquote}
     244             : xdstop          {dquote}
     245             : xddouble        {dquote}{dquote}
     246             : xdinside        [^"]+
     247             : 
     248             : /* Unicode escapes */
     249             : uescape         [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
     250             : /* error rule to avoid backup */
     251             : uescapefail     [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
     252             : 
     253             : /* Quoted identifier with Unicode escapes */
     254             : xuistart        [uU]&{dquote}
     255             : 
     256             : /* Quoted string with Unicode escapes */
     257             : xusstart        [uU]&{quote}
     258             : 
     259             : /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
     260             : xustop1     {uescapefail}?
     261             : xustop2     {uescape}
     262             : 
     263             : /* error rule to avoid backup */
     264             : xufailed        [uU]&
     265             : 
     266             : 
     267             : /* C-style comments
     268             :  *
     269             :  * The "extended comment" syntax closely resembles allowable operator syntax.
     270             :  * The tricky part here is to get lex to recognize a string starting with
     271             :  * slash-star as a comment, when interpreting it as an operator would produce
     272             :  * a longer match --- remember lex will prefer a longer match!  Also, if we
     273             :  * have something like plus-slash-star, lex will think this is a 3-character
     274             :  * operator whereas we want to see it as a + operator and a comment start.
     275             :  * The solution is two-fold:
     276             :  * 1. append {op_chars}* to xcstart so that it matches as much text as
     277             :  *    {operator} would. Then the tie-breaker (first matching rule of same
     278             :  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
     279             :  *    in case it contains a star-slash that should terminate the comment.
     280             :  * 2. In the operator rule, check for slash-star within the operator, and
     281             :  *    if found throw it back with yyless().  This handles the plus-slash-star
     282             :  *    problem.
     283             :  * Dash-dash comments have similar interactions with the operator rule.
     284             :  */
     285             : xcstart         \/\*{op_chars}*
     286             : xcstop          \*+\/
     287             : xcinside        [^*/]+
     288             : 
     289             : digit           [0-9]
     290             : ident_start     [A-Za-z\200-\377_]
     291             : ident_cont      [A-Za-z\200-\377_0-9\$]
     292             : 
     293             : identifier      {ident_start}{ident_cont}*
     294             : 
     295             : /* Assorted special-case operators and operator-like tokens */
     296             : typecast        "::"
     297             : dot_dot         \.\.
     298             : colon_equals    ":="
     299             : equals_greater  "=>"
     300             : less_equals     "<="
     301             : greater_equals  ">="
     302             : less_greater    "<>"
     303             : not_equals      "!="
     304             : 
     305             : /*
     306             :  * "self" is the set of chars that should be returned as single-character
     307             :  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
     308             :  * which can be one or more characters long (but if a single-char token
     309             :  * appears in the "self" set, it is not to be returned as an Op).  Note
     310             :  * that the sets overlap, but each has some chars that are not in the other.
     311             :  *
     312             :  * If you change either set, adjust the character lists appearing in the
     313             :  * rule for "operator"!
     314             :  */
     315             : self            [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
     316             : op_chars        [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
     317             : operator        {op_chars}+
     318             : 
     319             : /* we no longer allow unary minus in numbers.
     320             :  * instead we pass it separately to parser. there it gets
     321             :  * coerced via doNegate() -- Leon aug 20 1999
     322             :  *
     323             :  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
     324             :  *
     325             :  * {realfail1} and {realfail2} are added to prevent the need for scanner
     326             :  * backup when the {real} rule fails to match completely.
     327             :  */
     328             : 
     329             : integer         {digit}+
     330             : decimal         (({digit}*\.{digit}+)|({digit}+\.{digit}*))
     331             : decimalfail     {digit}+\.\.
     332             : real            ({integer}|{decimal})[Ee][-+]?{digit}+
     333             : realfail1       ({integer}|{decimal})[Ee]
     334             : realfail2       ({integer}|{decimal})[Ee][-+]
     335             : 
     336             : param           \${integer}
     337             : 
     338             : /* psql-specific: characters allowed in variable names */
     339             : variable_char   [A-Za-z\200-\377_0-9]
     340             : 
     341             : other           .
     342             : 
     343             : /*
     344             :  * Dollar quoted strings are totally opaque, and no escaping is done on them.
     345             :  * Other quoted strings must allow some special characters such as single-quote
     346             :  *  and newline.
     347             :  * Embedded single-quotes are implemented both in the SQL standard
     348             :  *  style of two adjacent single quotes "''" and in the Postgres/Java style
     349             :  *  of escaped-quote "\'".
     350             :  * Other embedded escaped characters are matched explicitly and the leading
     351             :  *  backslash is dropped from the string.
     352             :  * Note that xcstart must appear before operator, as explained above!
     353             :  *  Also whitespace (comment) must appear before operator.
     354             :  */
     355             : 
     356             : %%
     357             : 
     358             : %{
     359             :         /* Declare some local variables inside yylex(), for convenience */
     360       72220 :         PsqlScanState cur_state = yyextra;
     361       72220 :         PQExpBuffer output_buf = cur_state->output_buf;
     362             : 
     363             :         /*
     364             :          * Force flex into the state indicated by start_state.  This has a
     365             :          * couple of purposes: it lets some of the functions below set a new
     366             :          * starting state without ugly direct access to flex variables, and it
     367             :          * allows us to transition from one flex lexer to another so that we
     368             :          * can lex different parts of the source string using separate lexers.
     369             :          */
     370       72220 :         BEGIN(cur_state->start_state);
     371             : %}
     372             : 
     373             : {whitespace}    {
     374             :                     /*
     375             :                      * Note that the whitespace rule includes both true
     376             :                      * whitespace and single-line ("--" style) comments.
     377             :                      * We suppress whitespace at the start of the query
     378             :                      * buffer.  We also suppress all single-line comments,
     379             :                      * which is pretty dubious but is the historical
     380             :                      * behavior.
     381             :                      */
     382      176685 :                     if (!(output_buf->len == 0 || yytext[0] == '-'))
     383      164968 :                         ECHO;
     384             :                 }
     385      176685 : 
     386             : {xcstart}       {
     387          39 :                     cur_state->xcdepth = 0;
     388          39 :                     BEGIN(xc);
     389             :                     /* Put back any characters past slash-star; see above */
     390          39 :                     yyless(2);
     391          39 :                     ECHO;
     392             :                 }
     393          39 : 
     394             : <xc>{xcstart} {
     395           3 :                     cur_state->xcdepth++;
     396             :                     /* Put back any characters past slash-star; see above */
     397           3 :                     yyless(2);
     398           3 :                     ECHO;
     399             :                 }
     400           3 : 
     401             : <xc>{xcstop}  {
     402          42 :                     if (cur_state->xcdepth <= 0)
     403          39 :                         BEGIN(INITIAL);
     404             :                     else
     405           3 :                         cur_state->xcdepth--;
     406          42 :                     ECHO;
     407             :                 }
     408          42 : 
     409             : <xc>{xcinside}    {
     410          84 :                     ECHO;
     411             :                 }
     412          84 : 
     413             : <xc>{op_chars}    {
     414          18 :                     ECHO;
     415             :                 }
     416          18 : 
     417             : <xc>\*+           {
     418           0 :                     ECHO;
     419             :                 }
     420           0 : 
     421             : {xbstart}       {
     422         117 :                     BEGIN(xb);
     423         117 :                     ECHO;
     424             :                 }
     425         117 : <xb>{quotestop}   |
     426             : <xb>{quotefail} {
     427         117 :                     yyless(1);
     428         117 :                     BEGIN(INITIAL);
     429         117 :                     ECHO;
     430             :                 }
     431         117 : <xh>{xhinside}    |
     432             : <xb>{xbinside}    {
     433         144 :                     ECHO;
     434             :                 }
     435         144 : <xh>{quotecontinue}   |
     436             : <xb>{quotecontinue}   {
     437           0 :                     ECHO;
     438             :                 }
     439           0 : 
     440             : {xhstart}       {
     441             :                     /* Hexadecimal bit type.
     442             :                      * At some point we should simply pass the string
     443             :                      * forward to the parser and label it there.
     444             :                      * In the meantime, place a leading "x" on the string
     445             :                      * to mark it for the input routine as a hex string.
     446             :                      */
     447          32 :                     BEGIN(xh);
     448          32 :                     ECHO;
     449             :                 }
     450          32 : <xh>{quotestop}   |
     451             : <xh>{quotefail} {
     452          32 :                     yyless(1);
     453          32 :                     BEGIN(INITIAL);
     454          32 :                     ECHO;
     455             :                 }
     456          32 : 
     457             : {xnstart}       {
     458           0 :                     yyless(1);  /* eat only 'n' this time */
     459           0 :                     ECHO;
     460             :                 }
     461           0 : 
     462             : {xqstart}       {
     463       14706 :                     if (cur_state->std_strings)
     464       14659 :                         BEGIN(xq);
     465             :                     else
     466          47 :                         BEGIN(xe);
     467       14706 :                     ECHO;
     468             :                 }
     469       14706 : {xestart}       {
     470          90 :                     BEGIN(xe);
     471          90 :                     ECHO;
     472             :                 }
     473          90 : {xusstart}      {
     474          12 :                     BEGIN(xus);
     475          12 :                     ECHO;
     476             :                 }
     477          12 : <xq,xe>{quotestop}    |
     478             : <xq,xe>{quotefail} {
     479       14796 :                     yyless(1);
     480       14796 :                     BEGIN(INITIAL);
     481       14796 :                     ECHO;
     482             :                 }
     483       14796 : <xus>{quotestop} |
     484             : <xus>{quotefail} {
     485             :                     /* throw back all but the quote */
     486          12 :                     yyless(1);
     487          12 :                     BEGIN(xusend);
     488          12 :                     ECHO;
     489             :                 }
     490          12 : <xusend>{whitespace} {
     491           8 :                     ECHO;
     492             :                 }
     493           8 : <xusend>{other} |
     494             : <xusend>{xustop1} {
     495           6 :                     yyless(0);
     496           6 :                     BEGIN(INITIAL);
     497           6 :                     ECHO;
     498             :                 }
     499           6 : <xusend>{xustop2} {
     500           6 :                     BEGIN(INITIAL);
     501           6 :                     ECHO;
     502             :                 }
     503           6 : <xq,xe,xus>{xqdouble} {
     504         367 :                     ECHO;
     505             :                 }
     506         367 : <xq,xus>{xqinside}  {
     507       15054 :                     ECHO;
     508             :                 }
     509       15054 : <xe>{xeinside}  {
     510         198 :                     ECHO;
     511             :                 }
     512         198 : <xe>{xeunicode} {
     513           0 :                     ECHO;
     514             :                 }
     515           0 : <xe>{xeunicodefail}   {
     516           0 :                     ECHO;
     517             :                 }
     518           0 : <xe>{xeescape}  {
     519         128 :                     ECHO;
     520             :                 }
     521         128 : <xe>{xeoctesc}  {
     522           2 :                     ECHO;
     523             :                 }
     524           2 : <xe>{xehexesc}  {
     525           0 :                     ECHO;
     526             :                 }
     527           0 : <xq,xe,xus>{quotecontinue} {
     528           0 :                     ECHO;
     529             :                 }
     530           0 : <xe>.         {
     531             :                     /* This is only needed for \ just before EOF */
     532           0 :                     ECHO;
     533             :                 }
     534           0 : 
     535             : {dolqdelim}     {
     536         458 :                     cur_state->dolqstart = pg_strdup(yytext);
     537         458 :                     BEGIN(xdolq);
     538         458 :                     ECHO;
     539             :                 }
     540         458 : {dolqfailed}    {
     541             :                     /* throw back all but the initial "$" */
     542           0 :                     yyless(1);
     543           0 :                     ECHO;
     544             :                 }
     545           0 : <xdolq>{dolqdelim} {
     546         480 :                     if (strcmp(yytext, cur_state->dolqstart) == 0)
     547             :                     {
     548         458 :                         free(cur_state->dolqstart);
     549         458 :                         cur_state->dolqstart = NULL;
     550         458 :                         BEGIN(INITIAL);
     551             :                     }
     552             :                     else
     553             :                     {
     554             :                         /*
     555             :                          * When we fail to match $...$ to dolqstart, transfer
     556             :                          * the $... part to the output, but put back the final
     557             :                          * $ for rescanning.  Consider $delim$...$junk$delim$
     558             :                          */
     559          22 :                         yyless(yyleng - 1);
     560             :                     }
     561         480 :                     ECHO;
     562             :                 }
     563         480 : <xdolq>{dolqinside} {
     564        2935 :                     ECHO;
     565             :                 }
     566        2935 : <xdolq>{dolqfailed} {
     567           7 :                     ECHO;
     568             :                 }
     569           7 : <xdolq>.      {
     570             :                     /* This is only needed for $ inside the quoted text */
     571         244 :                     ECHO;
     572             :                 }
     573         244 : 
     574             : {xdstart}       {
     575         929 :                     BEGIN(xd);
     576         929 :                     ECHO;
     577             :                 }
     578         929 : {xuistart}      {
     579           6 :                     BEGIN(xui);
     580           6 :                     ECHO;
     581             :                 }
     582           6 : <xd>{xdstop}  {
     583         929 :                     BEGIN(INITIAL);
     584         929 :                     ECHO;
     585             :                 }
     586         929 : <xui>{dquote} {
     587           6 :                     yyless(1);
     588           6 :                     BEGIN(xuiend);
     589           6 :                     ECHO;
     590             :                 }
     591           6 : <xuiend>{whitespace} {
     592           4 :                     ECHO;
     593             :                 }
     594           4 : <xuiend>{other} |
     595             : <xuiend>{xustop1} {
     596           2 :                     yyless(0);
     597           2 :                     BEGIN(INITIAL);
     598           2 :                     ECHO;
     599             :                 }
     600           2 : <xuiend>{xustop2} {
     601           4 :                     BEGIN(INITIAL);
     602           4 :                     ECHO;
     603             :                 }
     604           4 : <xd,xui>{xddouble}    {
     605           3 :                     ECHO;
     606             :                 }
     607           3 : <xd,xui>{xdinside}    {
     608         939 :                     ECHO;
     609             :                 }
     610         939 : 
     611             : {xufailed}  {
     612             :                     /* throw back all but the initial u/U */
     613           0 :                     yyless(1);
     614           0 :                     ECHO;
     615             :                 }
     616           0 : 
     617             : {typecast}      {
     618        3149 :                     ECHO;
     619             :                 }
     620        3149 : 
     621             : {dot_dot}       {
     622           0 :                     ECHO;
     623             :                 }
     624           0 : 
     625             : {colon_equals}  {
     626          80 :                     ECHO;
     627             :                 }
     628          80 : 
     629             : {equals_greater} {
     630          20 :                     ECHO;
     631             :                 }
     632          20 : 
     633             : {less_equals}   {
     634          92 :                     ECHO;
     635             :                 }
     636          92 : 
     637             : {greater_equals} {
     638         143 :                     ECHO;
     639             :                 }
     640         143 : 
     641             : {less_greater}  {
     642          98 :                     ECHO;
     643             :                 }
     644          98 : 
     645             : {not_equals}    {
     646         450 :                     ECHO;
     647             :                 }
     648         450 : 
     649             :     /*
     650             :      * These rules are specific to psql --- they implement parenthesis
     651             :      * counting and detection of command-ending semicolon.  These must
     652             :      * appear before the {self} rule so that they take precedence over it.
     653             :      */
     654             : 
     655             : "("               {
     656       22493 :                     cur_state->paren_depth++;
     657       22493 :                     ECHO;
     658             :                 }
     659       22493 : 
     660             : ")"               {
     661       22491 :                     if (cur_state->paren_depth > 0)
     662       22491 :                         cur_state->paren_depth--;
     663       22491 :                     ECHO;
     664             :                 }
     665       22491 : 
     666             : ";"               {
     667       24269 :                     ECHO;
     668       24269 :                     if (cur_state->paren_depth == 0)
     669             :                     {
     670             :                         /* Terminate lexing temporarily */
     671       24261 :                         cur_state->start_state = YY_START;
     672       24261 :                         return LEXRES_SEMI;
     673             :                     }
     674             :                 }
     675           8 : 
     676             :     /*
     677             :      * psql-specific rules to handle backslash commands and variable
     678             :      * substitution.  We want these before {self}, also.
     679             :      */
     680             : 
     681             : "\\"[;:]      {
     682             :                     /* Force a semicolon or colon into the query buffer */
     683           8 :                     psqlscan_emit(cur_state, yytext + 1, 1);
     684             :                 }
     685           8 : 
     686             : "\\"          {
     687             :                     /* Terminate lexing temporarily */
     688         785 :                     cur_state->start_state = YY_START;
     689         785 :                     return LEXRES_BACKSLASH;
     690             :                 }
     691             : 
     692             : :{variable_char}+   {
     693             :                     /* Possible psql variable substitution */
     694             :                     char       *varname;
     695             :                     char       *value;
     696             : 
     697         152 :                     varname = psqlscan_extract_substring(cur_state,
     698          76 :                                                          yytext + 1,
     699          76 :                                                          yyleng - 1);
     700          76 :                     if (cur_state->callbacks->get_variable)
     701          76 :                         value = cur_state->callbacks->get_variable(varname,
     702             :                                                                    PQUOTE_PLAIN,
     703             :                                                                    cur_state->cb_passthrough);
     704             :                     else
     705           0 :                         value = NULL;
     706             : 
     707          76 :                     if (value)
     708             :                     {
     709             :                         /* It is a variable, check for recursion */
     710          17 :                         if (psqlscan_var_is_current_source(cur_state, varname))
     711             :                         {
     712             :                             /* Recursive expansion --- don't go there */
     713           0 :                             cur_state->callbacks->write_error("skipping recursive expansion of variable \"%s\"\n",
     714             :                                                               varname);
     715             :                             /* Instead copy the string as is */
     716           0 :                             ECHO;
     717             :                         }
     718             :                         else
     719             :                         {
     720             :                             /* OK, perform substitution */
     721          17 :                             psqlscan_push_new_buffer(cur_state, value, varname);
     722             :                             /* yy_scan_string already made buffer active */
     723             :                         }
     724          17 :                         free(value);
     725             :                     }
     726             :                     else
     727             :                     {
     728             :                         /*
     729             :                          * if the variable doesn't exist we'll copy the string
     730             :                          * as is
     731             :                          */
     732          59 :                         ECHO;
     733             :                     }
     734             : 
     735          76 :                     free(varname);
     736             :                 }
     737          76 : 
     738             : :'{variable_char}+' {
     739           1 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     740             :                                              PQUOTE_SQL_LITERAL);
     741             :                 }
     742           1 : 
     743             : :\"{variable_char}+\" {
     744           0 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     745             :                                              PQUOTE_SQL_IDENT);
     746             :                 }
     747           0 : 
     748             :     /*
     749             :      * These rules just avoid the need for scanner backup if one of the
     750             :      * two rules above fails to match completely.
     751             :      */
     752             : 
     753             : :'{variable_char}*  {
     754             :                     /* Throw back everything but the colon */
     755           0 :                     yyless(1);
     756           0 :                     ECHO;
     757             :                 }
     758           0 : 
     759             : :\"{variable_char}*    {
     760             :                     /* Throw back everything but the colon */
     761           0 :                     yyless(1);
     762           0 :                     ECHO;
     763             :                 }
     764           0 : 
     765             :     /*
     766             :      * Back to backend-compatible rules.
     767             :      */
     768             : 
     769             : {self}          {
     770       41690 :                     ECHO;
     771             :                 }
     772       41690 : 
     773             : {operator}      {
     774             :                     /*
     775             :                      * Check for embedded slash-star or dash-dash; those
     776             :                      * are comment starts, so operator must stop there.
     777             :                      * Note that slash-star or dash-dash at the first
     778             :                      * character will match a prior rule, not this one.
     779             :                      */
     780        1501 :                     int         nchars = yyleng;
     781        1501 :                     char       *slashstar = strstr(yytext, "/*");
     782        1501 :                     char       *dashdash = strstr(yytext, "--");
     783             : 
     784        1501 :                     if (slashstar && dashdash)
     785             :                     {
     786             :                         /* if both appear, take the first one */
     787           0 :                         if (slashstar > dashdash)
     788           0 :                             slashstar = dashdash;
     789             :                     }
     790        1501 :                     else if (!slashstar)
     791        1501 :                         slashstar = dashdash;
     792        1501 :                     if (slashstar)
     793           0 :                         nchars = slashstar - yytext;
     794             : 
     795             :                     /*
     796             :                      * For SQL compatibility, '+' and '-' cannot be the
     797             :                      * last char of a multi-char operator unless the operator
     798             :                      * contains chars that are not in SQL operators.
     799             :                      * The idea is to lex '=-' as two operators, but not
     800             :                      * to forbid operator names like '?-' that could not be
     801             :                      * sequences of SQL operators.
     802             :                      */
     803        4342 :                     while (nchars > 1 &&
     804        2668 :                            (yytext[nchars - 1] == '+' ||
     805        1334 :                             yytext[nchars - 1] == '-'))
     806             :                     {
     807             :                         int         ic;
     808             : 
     809          33 :                         for (ic = nchars - 2; ic >= 0; ic--)
     810             :                         {
     811          27 :                             if (strchr("~!@#^&|`?%", yytext[ic]))
     812          21 :                                 break;
     813             :                         }
     814          27 :                         if (ic >= 0)
     815          21 :                             break; /* found a char that makes it OK */
     816           6 :                         nchars--; /* else remove the +/-, and check again */
     817             :                     }
     818             : 
     819        1501 :                     if (nchars < yyleng)
     820             :                     {
     821             :                         /* Strip the unwanted chars from the token */
     822           6 :                         yyless(nchars);
     823             :                     }
     824        1501 :                     ECHO;
     825             :                 }
     826        1501 : 
     827             : {param}         {
     828          23 :                     ECHO;
     829             :                 }
     830          23 : 
     831             : {integer}       {
     832       13843 :                     ECHO;
     833             :                 }
     834       13843 : {decimal}       {
     835         729 :                     ECHO;
     836             :                 }
     837         729 : {decimalfail}   {
     838             :                     /* throw back the .., and treat as integer */
     839           0 :                     yyless(yyleng - 2);
     840           0 :                     ECHO;
     841             :                 }
     842           0 : {real}          {
     843          24 :                     ECHO;
     844             :                 }
     845          24 : {realfail1}     {
     846             :                     /*
     847             :                      * throw back the [Ee], and treat as {decimal}.  Note
     848             :                      * that it is possible the input is actually {integer},
     849             :                      * but since this case will almost certainly lead to a
     850             :                      * syntax error anyway, we don't bother to distinguish.
     851             :                      */
     852           0 :                     yyless(yyleng - 1);
     853           0 :                     ECHO;
     854             :                 }
     855           0 : {realfail2}     {
     856             :                     /* throw back the [Ee][+-], and proceed as above */
     857           0 :                     yyless(yyleng - 2);
     858           0 :                     ECHO;
     859             :                 }
     860           0 : 
     861             : 
     862             : {identifier}    {
     863      174233 :                     ECHO;
     864             :                 }
     865      174233 : 
     866             : {other}         {
     867           0 :                     ECHO;
     868             :                 }
     869           0 : 
     870             : <<EOF>>         {
     871       47191 :                     if (cur_state->buffer_stack == NULL)
     872             :                     {
     873       47174 :                         cur_state->start_state = YY_START;
     874       47174 :                         return LEXRES_EOL;      /* end of input reached */
     875             :                     }
     876             : 
     877             :                     /*
     878             :                      * We were expanding a variable, so pop the inclusion
     879             :                      * stack and keep lexing
     880             :                      */
     881          17 :                     psqlscan_pop_buffer_stack(cur_state);
     882          17 :                     psqlscan_select_top_buffer(cur_state);
     883             :                 }
     884          17 : 
     885           0 : %%
     886           0 : 
     887             : /*
     888             :  * Create a lexer working state struct.
     889             :  *
     890             :  * callbacks is a struct of function pointers that encapsulate some
     891             :  * behavior we need from the surrounding program.  This struct must
     892             :  * remain valid for the lifespan of the PsqlScanState.
     893             :  */
     894             : PsqlScanState
     895         181 : psql_scan_create(const PsqlScanCallbacks *callbacks)
     896             : {
     897             :     PsqlScanState state;
     898             : 
     899         181 :     state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
     900             : 
     901         181 :     state->callbacks = callbacks;
     902             : 
     903         181 :     yylex_init(&state->scanner);
     904             : 
     905         181 :     yyset_extra(state, state->scanner);
     906             : 
     907         181 :     psql_scan_reset(state);
     908             : 
     909         181 :     return state;
     910             : }
     911             : 
     912             : /*
     913             :  * Destroy a lexer working state struct, releasing all resources.
     914             :  */
     915             : void
     916         181 : psql_scan_destroy(PsqlScanState state)
     917             : {
     918         181 :     psql_scan_finish(state);
     919             : 
     920         181 :     psql_scan_reset(state);
     921             : 
     922         181 :     yylex_destroy(state->scanner);
     923             : 
     924         181 :     free(state);
     925         181 : }
     926             : 
     927             : /*
     928             :  * Set the callback passthrough pointer for the lexer.
     929             :  *
     930             :  * This could have been integrated into psql_scan_create, but keeping it
     931             :  * separate allows the application to change the pointer later, which might
     932             :  * be useful.
     933             :  */
     934             : void
     935         181 : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
     936             : {
     937         181 :     state->cb_passthrough = passthrough;
     938         181 : }
     939             : 
     940             : /*
     941             :  * Set up to perform lexing of the given input line.
     942             :  *
     943             :  * The text at *line, extending for line_len bytes, will be scanned by
     944             :  * subsequent calls to the psql_scan routines.  psql_scan_finish should
     945             :  * be called when scanning is complete.  Note that the lexer retains
     946             :  * a pointer to the storage at *line --- this string must not be altered
     947             :  * or freed until after psql_scan_finish is called.
     948             :  *
     949             :  * encoding is the libpq identifier for the character encoding in use,
     950             :  * and std_strings says whether standard_conforming_strings is on.
     951             :  */
     952             : void
     953       47174 : psql_scan_setup(PsqlScanState state,
     954             :                 const char *line, int line_len,
     955             :                 int encoding, bool std_strings)
     956             : {
     957             :     /* Mustn't be scanning already */
     958       47174 :     Assert(state->scanbufhandle == NULL);
     959       47174 :     Assert(state->buffer_stack == NULL);
     960             : 
     961             :     /* Do we need to hack the character set encoding? */
     962       47174 :     state->encoding = encoding;
     963       47174 :     state->safe_encoding = pg_valid_server_encoding_id(encoding);
     964             : 
     965             :     /* Save standard-strings flag as well */
     966       47174 :     state->std_strings = std_strings;
     967             : 
     968             :     /* Set up flex input buffer with appropriate translation and padding */
     969       47174 :     state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
     970             :                                                    &state->scanbuf);
     971       47174 :     state->scanline = line;
     972             : 
     973             :     /* Set lookaside data in case we have to map unsafe encoding */
     974       47174 :     state->curline = state->scanbuf;
     975       47174 :     state->refline = state->scanline;
     976       47174 : }
     977             : 
     978             : /*
     979             :  * Do lexical analysis of SQL command text.
     980             :  *
     981             :  * The text previously passed to psql_scan_setup is scanned, and appended
     982             :  * (possibly with transformation) to query_buf.
     983             :  *
     984             :  * The return value indicates the condition that stopped scanning:
     985             :  *
     986             :  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
     987             :  * transferred to query_buf.)  The command accumulated in query_buf should
     988             :  * be executed, then clear query_buf and call again to scan the remainder
     989             :  * of the line.
     990             :  *
     991             :  * PSCAN_BACKSLASH: found a backslash that starts a special command.
     992             :  * Any previous data on the line has been transferred to query_buf.
     993             :  * The caller will typically next apply a separate flex lexer to scan
     994             :  * the special command.
     995             :  *
     996             :  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
     997             :  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
     998             :  *
     999             :  * PSCAN_EOL: the end of the line was reached, and there is no lexical
    1000             :  * reason to consider the command incomplete.  The caller may or may not
    1001             :  * choose to send it.  *prompt is set to the appropriate prompt type if
    1002             :  * the caller chooses to collect more input.
    1003             :  *
    1004             :  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
    1005             :  * be called next, then the cycle may be repeated with a fresh input line.
    1006             :  *
    1007             :  * In all cases, *prompt is set to an appropriate prompt type code for the
    1008             :  * next line-input operation.
    1009             :  */
    1010             : PsqlScanResult
    1011       72220 : psql_scan(PsqlScanState state,
    1012             :           PQExpBuffer query_buf,
    1013             :           promptStatus_t *prompt)
    1014             : {
    1015             :     PsqlScanResult result;
    1016             :     int         lexresult;
    1017             : 
    1018             :     /* Must be scanning already */
    1019       72220 :     Assert(state->scanbufhandle != NULL);
    1020             : 
    1021             :     /* Set current output target */
    1022       72220 :     state->output_buf = query_buf;
    1023             : 
    1024             :     /* Set input source */
    1025       72220 :     if (state->buffer_stack != NULL)
    1026           0 :         yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
    1027             :     else
    1028       72220 :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1029             : 
    1030             :     /* And lex. */
    1031       72220 :     lexresult = yylex(NULL, state->scanner);
    1032             : 
    1033             :     /*
    1034             :      * Check termination state and return appropriate result info.
    1035             :      */
    1036       72220 :     switch (lexresult)
    1037             :     {
    1038             :         case LEXRES_EOL:        /* end of input */
    1039       47174 :             switch (state->start_state)
    1040             :             {
    1041             :                 case INITIAL:
    1042             :                 case xuiend:    /* we treat these like INITIAL */
    1043             :                 case xusend:
    1044       43215 :                     if (state->paren_depth > 0)
    1045             :                     {
    1046        3239 :                         result = PSCAN_INCOMPLETE;
    1047        3239 :                         *prompt = PROMPT_PAREN;
    1048             :                     }
    1049       39976 :                     else if (query_buf->len > 0)
    1050             :                     {
    1051        7209 :                         result = PSCAN_EOL;
    1052        7209 :                         *prompt = PROMPT_CONTINUE;
    1053             :                     }
    1054             :                     else
    1055             :                     {
    1056             :                         /* never bother to send an empty buffer */
    1057       32767 :                         result = PSCAN_INCOMPLETE;
    1058       32767 :                         *prompt = PROMPT_READY;
    1059             :                     }
    1060       43215 :                     break;
    1061             :                 case xb:
    1062           0 :                     result = PSCAN_INCOMPLETE;
    1063           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1064           0 :                     break;
    1065             :                 case xc:
    1066          36 :                     result = PSCAN_INCOMPLETE;
    1067          36 :                     *prompt = PROMPT_COMMENT;
    1068          36 :                     break;
    1069             :                 case xd:
    1070           3 :                     result = PSCAN_INCOMPLETE;
    1071           3 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1072           3 :                     break;
    1073             :                 case xh:
    1074           0 :                     result = PSCAN_INCOMPLETE;
    1075           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1076           0 :                     break;
    1077             :                 case xe:
    1078           0 :                     result = PSCAN_INCOMPLETE;
    1079           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1080           0 :                     break;
    1081             :                 case xq:
    1082        1024 :                     result = PSCAN_INCOMPLETE;
    1083        1024 :                     *prompt = PROMPT_SINGLEQUOTE;
    1084        1024 :                     break;
    1085             :                 case xdolq:
    1086        2896 :                     result = PSCAN_INCOMPLETE;
    1087        2896 :                     *prompt = PROMPT_DOLLARQUOTE;
    1088        2896 :                     break;
    1089             :                 case xui:
    1090           0 :                     result = PSCAN_INCOMPLETE;
    1091           0 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1092           0 :                     break;
    1093             :                 case xus:
    1094           0 :                     result = PSCAN_INCOMPLETE;
    1095           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1096           0 :                     break;
    1097             :                 default:
    1098             :                     /* can't get here */
    1099           0 :                     fprintf(stderr, "invalid YY_START\n");
    1100           0 :                     exit(1);
    1101             :             }
    1102       47174 :             break;
    1103             :         case LEXRES_SEMI:       /* semicolon */
    1104       24261 :             result = PSCAN_SEMICOLON;
    1105       24261 :             *prompt = PROMPT_READY;
    1106       24261 :             break;
    1107             :         case LEXRES_BACKSLASH:  /* backslash */
    1108         785 :             result = PSCAN_BACKSLASH;
    1109         785 :             *prompt = PROMPT_READY;
    1110         785 :             break;
    1111             :         default:
    1112             :             /* can't get here */
    1113           0 :             fprintf(stderr, "invalid yylex result\n");
    1114           0 :             exit(1);
    1115             :     }
    1116             : 
    1117       72220 :     return result;
    1118             : }
    1119             : 
    1120             : /*
    1121             :  * Clean up after scanning a string.  This flushes any unread input and
    1122             :  * releases resources (but not the PsqlScanState itself).  Note however
    1123             :  * that this does not reset the lexer scan state; that can be done by
    1124             :  * psql_scan_reset(), which is an orthogonal operation.
    1125             :  *
    1126             :  * It is legal to call this when not scanning anything (makes it easier
    1127             :  * to deal with error recovery).
    1128             :  */
    1129             : void
    1130       47355 : psql_scan_finish(PsqlScanState state)
    1131             : {
    1132             :     /* Drop any incomplete variable expansions. */
    1133       94710 :     while (state->buffer_stack != NULL)
    1134           0 :         psqlscan_pop_buffer_stack(state);
    1135             : 
    1136             :     /* Done with the outer scan buffer, too */
    1137       47355 :     if (state->scanbufhandle)
    1138       47174 :         yy_delete_buffer(state->scanbufhandle, state->scanner);
    1139       47355 :     state->scanbufhandle = NULL;
    1140       47355 :     if (state->scanbuf)
    1141       47174 :         free(state->scanbuf);
    1142       47355 :     state->scanbuf = NULL;
    1143       47355 : }
    1144             : 
    1145             : /*
    1146             :  * Reset lexer scanning state to start conditions.  This is appropriate
    1147             :  * for executing \r psql commands (or any other time that we discard the
    1148             :  * prior contents of query_buf).  It is not, however, necessary to do this
    1149             :  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
    1150             :  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
    1151             :  * conditions are returned.
    1152             :  *
    1153             :  * Note that this is unrelated to flushing unread input; that task is
    1154             :  * done by psql_scan_finish().
    1155             :  */
    1156             : void
    1157         421 : psql_scan_reset(PsqlScanState state)
    1158             : {
    1159         421 :     state->start_state = INITIAL;
    1160         421 :     state->paren_depth = 0;
    1161         421 :     state->xcdepth = 0;          /* not really necessary */
    1162         421 :     if (state->dolqstart)
    1163           0 :         free(state->dolqstart);
    1164         421 :     state->dolqstart = NULL;
    1165         421 : }
    1166             : 
    1167             : /*
    1168             :  * Reselect this lexer (psqlscan.l) after using another one.
    1169             :  *
    1170             :  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
    1171             :  * state, because we'd never switch to another lexer in a different state.
    1172             :  * However, we don't want to reset e.g. paren_depth, so this can't be
    1173             :  * the same as psql_scan_reset().
    1174             :  *
    1175             :  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
    1176             :  * must be a superset of this.
    1177             :  *
    1178             :  * Note: it seems likely that other lexers could just assign INITIAL for
    1179             :  * themselves, since that probably has the value zero in every flex-generated
    1180             :  * lexer.  But let's not assume that.
    1181             :  */
    1182             : void
    1183        3644 : psql_scan_reselect_sql_lexer(PsqlScanState state)
    1184             : {
    1185        3644 :     state->start_state = INITIAL;
    1186        3644 : }
    1187             : 
    1188             : /*
    1189             :  * Return true if lexer is currently in an "inside quotes" state.
    1190             :  *
    1191             :  * This is pretty grotty but is needed to preserve the old behavior
    1192             :  * that mainloop.c drops blank lines not inside quotes without even
    1193             :  * echoing them.
    1194             :  */
    1195             : bool
    1196       11390 : psql_scan_in_quote(PsqlScanState state)
    1197             : {
    1198       11390 :     return state->start_state != INITIAL;
    1199             : }
    1200             : 
    1201             : /*
    1202             :  * Push the given string onto the stack of stuff to scan.
    1203             :  *
    1204             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1205             :  */
    1206             : void
    1207          17 : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
    1208             :                          const char *varname)
    1209             : {
    1210             :     StackElem  *stackelem;
    1211             : 
    1212          17 :     stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
    1213             : 
    1214             :     /*
    1215             :      * In current usage, the passed varname points at the current flex input
    1216             :      * buffer; we must copy it before calling psqlscan_prepare_buffer()
    1217             :      * because that will change the buffer state.
    1218             :      */
    1219          17 :     stackelem->varname = varname ? pg_strdup(varname) : NULL;
    1220             : 
    1221          17 :     stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
    1222             :                                              &stackelem->bufstring);
    1223          17 :     state->curline = stackelem->bufstring;
    1224          17 :     if (state->safe_encoding)
    1225             :     {
    1226          17 :         stackelem->origstring = NULL;
    1227          17 :         state->refline = stackelem->bufstring;
    1228             :     }
    1229             :     else
    1230             :     {
    1231           0 :         stackelem->origstring = pg_strdup(newstr);
    1232           0 :         state->refline = stackelem->origstring;
    1233             :     }
    1234          17 :     stackelem->next = state->buffer_stack;
    1235          17 :     state->buffer_stack = stackelem;
    1236          17 : }
    1237             : 
    1238             : /*
    1239             :  * Pop the topmost buffer stack item (there must be one!)
    1240             :  *
    1241             :  * NB: after this, the flex input state is unspecified; caller must
    1242             :  * switch to an appropriate buffer to continue lexing.
    1243             :  * See psqlscan_select_top_buffer().
    1244             :  */
    1245             : void
    1246          17 : psqlscan_pop_buffer_stack(PsqlScanState state)
    1247             : {
    1248          17 :     StackElem  *stackelem = state->buffer_stack;
    1249             : 
    1250          17 :     state->buffer_stack = stackelem->next;
    1251          17 :     yy_delete_buffer(stackelem->buf, state->scanner);
    1252          17 :     free(stackelem->bufstring);
    1253          17 :     if (stackelem->origstring)
    1254           0 :         free(stackelem->origstring);
    1255          17 :     if (stackelem->varname)
    1256          17 :         free(stackelem->varname);
    1257          17 :     free(stackelem);
    1258          17 : }
    1259             : 
    1260             : /*
    1261             :  * Select the topmost surviving buffer as the active input.
    1262             :  */
    1263             : void
    1264          17 : psqlscan_select_top_buffer(PsqlScanState state)
    1265             : {
    1266          17 :     StackElem  *stackelem = state->buffer_stack;
    1267             : 
    1268          17 :     if (stackelem != NULL)
    1269             :     {
    1270           0 :         yy_switch_to_buffer(stackelem->buf, state->scanner);
    1271           0 :         state->curline = stackelem->bufstring;
    1272           0 :         state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
    1273             :     }
    1274             :     else
    1275             :     {
    1276          17 :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1277          17 :         state->curline = state->scanbuf;
    1278          17 :         state->refline = state->scanline;
    1279             :     }
    1280          17 : }
    1281             : 
    1282             : /*
    1283             :  * Check if specified variable name is the source for any string
    1284             :  * currently being scanned
    1285             :  */
    1286             : bool
    1287          17 : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
    1288             : {
    1289             :     StackElem  *stackelem;
    1290             : 
    1291          34 :     for (stackelem = state->buffer_stack;
    1292             :          stackelem != NULL;
    1293           0 :          stackelem = stackelem->next)
    1294             :     {
    1295           0 :         if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
    1296           0 :             return true;
    1297             :     }
    1298          17 :     return false;
    1299             : }
    1300             : 
    1301             : /*
    1302             :  * Set up a flex input buffer to scan the given data.  We always make a
    1303             :  * copy of the data.  If working in an unsafe encoding, the copy has
    1304             :  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
    1305             :  *
    1306             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1307             :  */
    1308             : YY_BUFFER_STATE
    1309       47191 : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
    1310             :                         char **txtcopy)
    1311             : {
    1312             :     char       *newtxt;
    1313             : 
    1314             :     /* Flex wants two \0 characters after the actual data */
    1315       47191 :     newtxt = pg_malloc(len + 2);
    1316       47191 :     *txtcopy = newtxt;
    1317       47191 :     newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
    1318             : 
    1319       47191 :     if (state->safe_encoding)
    1320       47191 :         memcpy(newtxt, txt, len);
    1321             :     else
    1322             :     {
    1323             :         /* Gotta do it the hard way */
    1324           0 :         int         i = 0;
    1325             : 
    1326           0 :         while (i < len)
    1327             :         {
    1328           0 :             int         thislen = PQmblen(txt + i, state->encoding);
    1329             : 
    1330             :             /* first byte should always be okay... */
    1331           0 :             newtxt[i] = txt[i];
    1332           0 :             i++;
    1333           0 :             while (--thislen > 0 && i < len)
    1334           0 :                 newtxt[i++] = (char) 0xFF;
    1335             :         }
    1336             :     }
    1337             : 
    1338       47191 :     return yy_scan_buffer(newtxt, len + 2, state->scanner);
    1339             : }
    1340             : 
    1341             : /*
    1342             :  * psqlscan_emit() --- body for ECHO macro
    1343             :  *
    1344             :  * NB: this must be used for ALL and ONLY the text copied from the flex
    1345             :  * input data.  If you pass it something that is not part of the yytext
    1346             :  * string, you are making a mistake.  Internally generated text can be
    1347             :  * appended directly to state->output_buf.
    1348             :  */
    1349             : void
    1350      532948 : psqlscan_emit(PsqlScanState state, const char *txt, int len)
    1351             : {
    1352      532948 :     PQExpBuffer output_buf = state->output_buf;
    1353             : 
    1354      532948 :     if (state->safe_encoding)
    1355      532948 :         appendBinaryPQExpBuffer(output_buf, txt, len);
    1356             :     else
    1357             :     {
    1358             :         /* Gotta do it the hard way */
    1359           0 :         const char *reference = state->refline;
    1360             :         int         i;
    1361             : 
    1362           0 :         reference += (txt - state->curline);
    1363             : 
    1364           0 :         for (i = 0; i < len; i++)
    1365             :         {
    1366           0 :             char        ch = txt[i];
    1367             : 
    1368           0 :             if (ch == (char) 0xFF)
    1369           0 :                 ch = reference[i];
    1370           0 :             appendPQExpBufferChar(output_buf, ch);
    1371             :         }
    1372             :     }
    1373      532948 : }
    1374             : 
    1375             : /*
    1376             :  * psqlscan_extract_substring --- fetch value of (part of) the current token
    1377             :  *
    1378             :  * This is like psqlscan_emit(), except that the data is returned as a
    1379             :  * malloc'd string rather than being pushed directly to state->output_buf.
    1380             :  */
    1381             : char *
    1382         116 : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
    1383             : {
    1384         116 :     char       *result = (char *) pg_malloc(len + 1);
    1385             : 
    1386         116 :     if (state->safe_encoding)
    1387         116 :         memcpy(result, txt, len);
    1388             :     else
    1389             :     {
    1390             :         /* Gotta do it the hard way */
    1391           0 :         const char *reference = state->refline;
    1392             :         int         i;
    1393             : 
    1394           0 :         reference += (txt - state->curline);
    1395             : 
    1396           0 :         for (i = 0; i < len; i++)
    1397             :         {
    1398           0 :             char        ch = txt[i];
    1399             : 
    1400           0 :             if (ch == (char) 0xFF)
    1401           0 :                 ch = reference[i];
    1402           0 :             result[i] = ch;
    1403             :         }
    1404             :     }
    1405         116 :     result[len] = '\0';
    1406         116 :     return result;
    1407             : }
    1408             : 
    1409             : /*
    1410             :  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
    1411             :  *
    1412             :  * If the variable name is found, escape its value using the appropriate
    1413             :  * quoting method and emit the value to output_buf.  (Since the result is
    1414             :  * surely quoted, there is never any reason to rescan it.)  If we don't
    1415             :  * find the variable or escaping fails, emit the token as-is.
    1416             :  */
    1417             : void
    1418           9 : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
    1419             :                          PsqlScanQuoteType quote)
    1420             : {
    1421             :     char       *varname;
    1422             :     char       *value;
    1423             : 
    1424             :     /* Variable lookup. */
    1425           9 :     varname = psqlscan_extract_substring(state, txt + 2, len - 3);
    1426           9 :     if (state->callbacks->get_variable)
    1427           9 :         value = state->callbacks->get_variable(varname, quote,
    1428             :                                                state->cb_passthrough);
    1429             :     else
    1430           0 :         value = NULL;
    1431           9 :     free(varname);
    1432             : 
    1433           9 :     if (value)
    1434             :     {
    1435             :         /* Emit the suitably-escaped value */
    1436           3 :         appendPQExpBufferStr(state->output_buf, value);
    1437           3 :         free(value);
    1438             :     }
    1439             :     else
    1440             :     {
    1441             :         /* Emit original token as-is */
    1442           6 :         psqlscan_emit(state, txt, len);
    1443             :     }
    1444           9 : }

Generated by: LCOV version 1.11