Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * psqlscan.l
5 : * lexical scanner for SQL commands
6 : *
7 : * This lexer used to be part of psql, and that heritage is reflected in
8 : * the file name as well as function and typedef names, though it can now
9 : * be used by other frontend programs as well. It's also possible to extend
10 : * this lexer with a compatible add-on lexer to handle program-specific
11 : * backslash commands.
12 : *
13 : * This code is mainly concerned with determining where the end of a SQL
14 : * statement is: we are looking for semicolons that are not within quotes,
15 : * comments, or parentheses. The most reliable way to handle this is to
16 : * borrow the backend's flex lexer rules, lock, stock, and barrel. The rules
17 : * below are (except for a few) the same as the backend's, but their actions
18 : * are just ECHO whereas the backend's actions generally do other things.
19 : *
20 : * XXX The rules in this file must be kept in sync with the backend lexer!!!
21 : *
22 : * XXX Avoid creating backtracking cases --- see the backend lexer for info.
23 : *
24 : * See psqlscan_int.h for additional commentary.
25 : *
26 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
27 : * Portions Copyright (c) 1994, Regents of the University of California
28 : *
29 : * IDENTIFICATION
30 : * src/fe_utils/psqlscan.l
31 : *
32 : *-------------------------------------------------------------------------
33 : */
34 : #include "postgres_fe.h"
35 :
36 : #include "fe_utils/psqlscan.h"
37 :
38 : #include "libpq-fe.h"
39 : }
40 :
41 : %{
42 : #include "fe_utils/psqlscan_int.h"
43 :
44 : /*
45 : * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
46 : * doesn't presently make use of that argument, so just declare it as int.
47 : */
48 : typedef int YYSTYPE;
49 :
50 : /*
51 : * Set the type of yyextra; we use it as a pointer back to the containing
52 : * PsqlScanState.
53 : */
54 : #define YY_EXTRA_TYPE PsqlScanState
55 :
56 :
57 : /* Return values from yylex() */
58 : #define LEXRES_EOL 0 /* end of input */
59 : #define LEXRES_SEMI 1 /* command-terminating semicolon found */
60 : #define LEXRES_BACKSLASH 2 /* backslash command start */
61 :
62 :
63 : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
64 :
65 : /*
66 : * Work around a bug in flex 2.5.35: it emits a couple of functions that
67 : * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
68 : * this would cause warnings. Providing our own declarations should be
69 : * harmless even when the bug gets fixed.
70 : */
71 : extern int psql_yyget_column(yyscan_t yyscanner);
72 : extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
73 :
74 : %}
75 :
76 : %option reentrant
77 : %option bison-bridge
78 : %option 8bit
79 : %option never-interactive
80 : %option nodefault
81 : %option noinput
82 : %option nounput
83 : %option noyywrap
84 : %option warn
85 : %option prefix="psql_yy"
86 :
87 : /*
88 : * All of the following definitions and rules should exactly match
89 : * src/backend/parser/scan.l so far as the flex patterns are concerned.
90 : * The rule bodies are just ECHO as opposed to what the backend does,
91 : * however. (But be sure to duplicate code that affects the lexing process,
92 : * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
93 : * whereas scan.l has a separate one for each exclusive state.
94 : */
95 :
96 : /*
97 : * OK, here is a short description of lex/flex rules behavior.
98 : * The longest pattern which matches an input string is always chosen.
99 : * For equal-length patterns, the first occurring in the rules list is chosen.
100 : * INITIAL is the starting state, to which all non-conditional rules apply.
101 : * Exclusive states change parsing rules while the state is active. When in
102 : * an exclusive state, only those rules defined for that state apply.
103 : *
104 : * We use exclusive states for quoted strings, extended comments,
105 : * and to eliminate parsing troubles for numeric strings.
106 : * Exclusive states:
107 : * <xb> bit string literal
108 : * <xc> extended C-style comments
109 : * <xd> delimited identifiers (double-quoted identifiers)
110 : * <xh> hexadecimal numeric string
111 : * <xq> standard quoted strings
112 : * <xe> extended quoted strings (support backslash escape sequences)
113 : * <xdolq> $foo$ quoted strings
114 : * <xui> quoted identifier with Unicode escapes
115 : * <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
116 : * <xus> quoted string with Unicode escapes
117 : * <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
118 : *
119 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
120 : * no need to distinguish it from <xe> state, and no good way to get out
121 : * of it in error cases. The backend just throws yyerror() in those
122 : * cases, but that's not an option here.
123 : */
124 :
125 : %x xb
126 : %x xc
127 : %x xd
128 : %x xh
129 : %x xe
130 : %x xq
131 : %x xdolq
132 : %x xui
133 : %x xuiend
134 : %x xus
135 : %x xusend
136 :
137 : /*
138 : * In order to make the world safe for Windows and Mac clients as well as
139 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
140 : * sequence will be seen as two successive newlines, but that doesn't cause
141 : * any problems. Comments that start with -- and extend to the next
142 : * newline are treated as equivalent to a single whitespace character.
143 : *
144 : * NOTE a fine point: if there is no newline following --, we will absorb
145 : * everything to the end of the input as a comment. This is correct. Older
146 : * versions of Postgres failed to recognize -- as a comment if the input
147 : * did not end with a newline.
148 : *
149 : * XXX perhaps \f (formfeed) should be treated as a newline as well?
150 : *
151 : * XXX if you change the set of whitespace characters, fix scanner_isspace()
152 : * to agree, and see also the plpgsql lexer.
153 : */
154 :
155 : space [ \t\n\r\f]
156 : horiz_space [ \t\f]
157 : newline [\n\r]
158 : non_newline [^\n\r]
159 :
160 : comment ("--"{non_newline}*)
161 :
162 : whitespace ({space}+|{comment})
163 :
164 : /*
165 : * SQL requires at least one newline in the whitespace separating
166 : * string literals that are to be concatenated. Silly, but who are we
167 : * to argue? Note that {whitespace_with_newline} should not have * after
168 : * it, whereas {whitespace} should generally have a * after it...
169 : */
170 :
171 : special_whitespace ({space}+|{comment}{newline})
172 : horiz_whitespace ({horiz_space}|{comment})
173 : whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
174 :
175 : /*
176 : * To ensure that {quotecontinue} can be scanned without having to back up
177 : * if the full pattern isn't matched, we include trailing whitespace in
178 : * {quotestop}. This matches all cases where {quotecontinue} fails to match,
179 : * except for {quote} followed by whitespace and just one "-" (not two,
180 : * which would start a {comment}). To cover that we have {quotefail}.
181 : * The actions for {quotestop} and {quotefail} must throw back characters
182 : * beyond the quote proper.
183 : */
184 : quote '
185 : quotestop {quote}{whitespace}*
186 : quotecontinue {quote}{whitespace_with_newline}{quote}
187 : quotefail {quote}{whitespace}*"-"
188 :
189 : /* Bit string
190 : * It is tempting to scan the string for only those characters
191 : * which are allowed. However, this leads to silently swallowed
192 : * characters if illegal characters are included in the string.
193 : * For example, if xbinside is [01] then B'ABCD' is interpreted
194 : * as a zero-length string, and the ABCD' is lost!
195 : * Better to pass the string forward and let the input routines
196 : * validate the contents.
197 : */
198 : xbstart [bB]{quote}
199 : xbinside [^']*
200 :
201 : /* Hexadecimal number */
202 : xhstart [xX]{quote}
203 : xhinside [^']*
204 :
205 : /* National character */
206 : xnstart [nN]{quote}
207 :
208 : /* Quoted string that allows backslash escapes */
209 : xestart [eE]{quote}
210 : xeinside [^\\']+
211 : xeescape [\\][^0-7]
212 : xeoctesc [\\][0-7]{1,3}
213 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
214 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
215 : xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
216 :
217 : /* Extended quote
218 : * xqdouble implements embedded quote, ''''
219 : */
220 : xqstart {quote}
221 : xqdouble {quote}{quote}
222 : xqinside [^']+
223 :
224 : /* $foo$ style quotes ("dollar quoting")
225 : * The quoted string starts with $foo$ where "foo" is an optional string
226 : * in the form of an identifier, except that it may not contain "$",
227 : * and extends to the first occurrence of an identical string.
228 : * There is *no* processing of the quoted text.
229 : *
230 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
231 : * fails to match its trailing "$".
232 : */
233 : dolq_start [A-Za-z\200-\377_]
234 : dolq_cont [A-Za-z\200-\377_0-9]
235 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
236 : dolqfailed \${dolq_start}{dolq_cont}*
237 : dolqinside [^$]+
238 :
239 : /* Double quote
240 : * Allows embedded spaces and other special characters into identifiers.
241 : */
242 : dquote \"
243 : xdstart {dquote}
244 : xdstop {dquote}
245 : xddouble {dquote}{dquote}
246 : xdinside [^"]+
247 :
248 : /* Unicode escapes */
249 : uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
250 : /* error rule to avoid backup */
251 : uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
252 :
253 : /* Quoted identifier with Unicode escapes */
254 : xuistart [uU]&{dquote}
255 :
256 : /* Quoted string with Unicode escapes */
257 : xusstart [uU]&{quote}
258 :
259 : /* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
260 : xustop1 {uescapefail}?
261 : xustop2 {uescape}
262 :
263 : /* error rule to avoid backup */
264 : xufailed [uU]&
265 :
266 :
267 : /* C-style comments
268 : *
269 : * The "extended comment" syntax closely resembles allowable operator syntax.
270 : * The tricky part here is to get lex to recognize a string starting with
271 : * slash-star as a comment, when interpreting it as an operator would produce
272 : * a longer match --- remember lex will prefer a longer match! Also, if we
273 : * have something like plus-slash-star, lex will think this is a 3-character
274 : * operator whereas we want to see it as a + operator and a comment start.
275 : * The solution is two-fold:
276 : * 1. append {op_chars}* to xcstart so that it matches as much text as
277 : * {operator} would. Then the tie-breaker (first matching rule of same
278 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
279 : * in case it contains a star-slash that should terminate the comment.
280 : * 2. In the operator rule, check for slash-star within the operator, and
281 : * if found throw it back with yyless(). This handles the plus-slash-star
282 : * problem.
283 : * Dash-dash comments have similar interactions with the operator rule.
284 : */
285 : xcstart \/\*{op_chars}*
286 : xcstop \*+\/
287 : xcinside [^*/]+
288 :
289 : digit [0-9]
290 : ident_start [A-Za-z\200-\377_]
291 : ident_cont [A-Za-z\200-\377_0-9\$]
292 :
293 : identifier {ident_start}{ident_cont}*
294 :
295 : /* Assorted special-case operators and operator-like tokens */
296 : typecast "::"
297 : dot_dot \.\.
298 : colon_equals ":="
299 : equals_greater "=>"
300 : less_equals "<="
301 : greater_equals ">="
302 : less_greater "<>"
303 : not_equals "!="
304 :
305 : /*
306 : * "self" is the set of chars that should be returned as single-character
307 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
308 : * which can be one or more characters long (but if a single-char token
309 : * appears in the "self" set, it is not to be returned as an Op). Note
310 : * that the sets overlap, but each has some chars that are not in the other.
311 : *
312 : * If you change either set, adjust the character lists appearing in the
313 : * rule for "operator"!
314 : */
315 : self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
316 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
317 : operator {op_chars}+
318 :
319 : /* we no longer allow unary minus in numbers.
320 : * instead we pass it separately to parser. there it gets
321 : * coerced via doNegate() -- Leon aug 20 1999
322 : *
323 : * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
324 : *
325 : * {realfail1} and {realfail2} are added to prevent the need for scanner
326 : * backup when the {real} rule fails to match completely.
327 : */
328 :
329 : integer {digit}+
330 : decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
331 : decimalfail {digit}+\.\.
332 : real ({integer}|{decimal})[Ee][-+]?{digit}+
333 : realfail1 ({integer}|{decimal})[Ee]
334 : realfail2 ({integer}|{decimal})[Ee][-+]
335 :
336 : param \${integer}
337 :
338 : /* psql-specific: characters allowed in variable names */
339 : variable_char [A-Za-z\200-\377_0-9]
340 :
341 : other .
342 :
343 : /*
344 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
345 : * Other quoted strings must allow some special characters such as single-quote
346 : * and newline.
347 : * Embedded single-quotes are implemented both in the SQL standard
348 : * style of two adjacent single quotes "''" and in the Postgres/Java style
349 : * of escaped-quote "\'".
350 : * Other embedded escaped characters are matched explicitly and the leading
351 : * backslash is dropped from the string.
352 : * Note that xcstart must appear before operator, as explained above!
353 : * Also whitespace (comment) must appear before operator.
354 : */
355 :
356 : %%
357 :
358 : %{
359 : /* Declare some local variables inside yylex(), for convenience */
360 72220 : PsqlScanState cur_state = yyextra;
361 72220 : PQExpBuffer output_buf = cur_state->output_buf;
362 :
363 : /*
364 : * Force flex into the state indicated by start_state. This has a
365 : * couple of purposes: it lets some of the functions below set a new
366 : * starting state without ugly direct access to flex variables, and it
367 : * allows us to transition from one flex lexer to another so that we
368 : * can lex different parts of the source string using separate lexers.
369 : */
370 72220 : BEGIN(cur_state->start_state);
371 : %}
372 :
373 : {whitespace} {
374 : /*
375 : * Note that the whitespace rule includes both true
376 : * whitespace and single-line ("--" style) comments.
377 : * We suppress whitespace at the start of the query
378 : * buffer. We also suppress all single-line comments,
379 : * which is pretty dubious but is the historical
380 : * behavior.
381 : */
382 176685 : if (!(output_buf->len == 0 || yytext[0] == '-'))
383 164968 : ECHO;
384 : }
385 176685 :
386 : {xcstart} {
387 39 : cur_state->xcdepth = 0;
388 39 : BEGIN(xc);
389 : /* Put back any characters past slash-star; see above */
390 39 : yyless(2);
391 39 : ECHO;
392 : }
393 39 :
394 : <xc>{xcstart} {
395 3 : cur_state->xcdepth++;
396 : /* Put back any characters past slash-star; see above */
397 3 : yyless(2);
398 3 : ECHO;
399 : }
400 3 :
401 : <xc>{xcstop} {
402 42 : if (cur_state->xcdepth <= 0)
403 39 : BEGIN(INITIAL);
404 : else
405 3 : cur_state->xcdepth--;
406 42 : ECHO;
407 : }
408 42 :
409 : <xc>{xcinside} {
410 84 : ECHO;
411 : }
412 84 :
413 : <xc>{op_chars} {
414 18 : ECHO;
415 : }
416 18 :
417 : <xc>\*+ {
418 0 : ECHO;
419 : }
420 0 :
421 : {xbstart} {
422 117 : BEGIN(xb);
423 117 : ECHO;
424 : }
425 117 : <xb>{quotestop} |
426 : <xb>{quotefail} {
427 117 : yyless(1);
428 117 : BEGIN(INITIAL);
429 117 : ECHO;
430 : }
431 117 : <xh>{xhinside} |
432 : <xb>{xbinside} {
433 144 : ECHO;
434 : }
435 144 : <xh>{quotecontinue} |
436 : <xb>{quotecontinue} {
437 0 : ECHO;
438 : }
439 0 :
440 : {xhstart} {
441 : /* Hexadecimal bit type.
442 : * At some point we should simply pass the string
443 : * forward to the parser and label it there.
444 : * In the meantime, place a leading "x" on the string
445 : * to mark it for the input routine as a hex string.
446 : */
447 32 : BEGIN(xh);
448 32 : ECHO;
449 : }
450 32 : <xh>{quotestop} |
451 : <xh>{quotefail} {
452 32 : yyless(1);
453 32 : BEGIN(INITIAL);
454 32 : ECHO;
455 : }
456 32 :
457 : {xnstart} {
458 0 : yyless(1); /* eat only 'n' this time */
459 0 : ECHO;
460 : }
461 0 :
462 : {xqstart} {
463 14706 : if (cur_state->std_strings)
464 14659 : BEGIN(xq);
465 : else
466 47 : BEGIN(xe);
467 14706 : ECHO;
468 : }
469 14706 : {xestart} {
470 90 : BEGIN(xe);
471 90 : ECHO;
472 : }
473 90 : {xusstart} {
474 12 : BEGIN(xus);
475 12 : ECHO;
476 : }
477 12 : <xq,xe>{quotestop} |
478 : <xq,xe>{quotefail} {
479 14796 : yyless(1);
480 14796 : BEGIN(INITIAL);
481 14796 : ECHO;
482 : }
483 14796 : <xus>{quotestop} |
484 : <xus>{quotefail} {
485 : /* throw back all but the quote */
486 12 : yyless(1);
487 12 : BEGIN(xusend);
488 12 : ECHO;
489 : }
490 12 : <xusend>{whitespace} {
491 8 : ECHO;
492 : }
493 8 : <xusend>{other} |
494 : <xusend>{xustop1} {
495 6 : yyless(0);
496 6 : BEGIN(INITIAL);
497 6 : ECHO;
498 : }
499 6 : <xusend>{xustop2} {
500 6 : BEGIN(INITIAL);
501 6 : ECHO;
502 : }
503 6 : <xq,xe,xus>{xqdouble} {
504 367 : ECHO;
505 : }
506 367 : <xq,xus>{xqinside} {
507 15054 : ECHO;
508 : }
509 15054 : <xe>{xeinside} {
510 198 : ECHO;
511 : }
512 198 : <xe>{xeunicode} {
513 0 : ECHO;
514 : }
515 0 : <xe>{xeunicodefail} {
516 0 : ECHO;
517 : }
518 0 : <xe>{xeescape} {
519 128 : ECHO;
520 : }
521 128 : <xe>{xeoctesc} {
522 2 : ECHO;
523 : }
524 2 : <xe>{xehexesc} {
525 0 : ECHO;
526 : }
527 0 : <xq,xe,xus>{quotecontinue} {
528 0 : ECHO;
529 : }
530 0 : <xe>. {
531 : /* This is only needed for \ just before EOF */
532 0 : ECHO;
533 : }
534 0 :
535 : {dolqdelim} {
536 458 : cur_state->dolqstart = pg_strdup(yytext);
537 458 : BEGIN(xdolq);
538 458 : ECHO;
539 : }
540 458 : {dolqfailed} {
541 : /* throw back all but the initial "$" */
542 0 : yyless(1);
543 0 : ECHO;
544 : }
545 0 : <xdolq>{dolqdelim} {
546 480 : if (strcmp(yytext, cur_state->dolqstart) == 0)
547 : {
548 458 : free(cur_state->dolqstart);
549 458 : cur_state->dolqstart = NULL;
550 458 : BEGIN(INITIAL);
551 : }
552 : else
553 : {
554 : /*
555 : * When we fail to match $...$ to dolqstart, transfer
556 : * the $... part to the output, but put back the final
557 : * $ for rescanning. Consider $delim$...$junk$delim$
558 : */
559 22 : yyless(yyleng - 1);
560 : }
561 480 : ECHO;
562 : }
563 480 : <xdolq>{dolqinside} {
564 2935 : ECHO;
565 : }
566 2935 : <xdolq>{dolqfailed} {
567 7 : ECHO;
568 : }
569 7 : <xdolq>. {
570 : /* This is only needed for $ inside the quoted text */
571 244 : ECHO;
572 : }
573 244 :
574 : {xdstart} {
575 929 : BEGIN(xd);
576 929 : ECHO;
577 : }
578 929 : {xuistart} {
579 6 : BEGIN(xui);
580 6 : ECHO;
581 : }
582 6 : <xd>{xdstop} {
583 929 : BEGIN(INITIAL);
584 929 : ECHO;
585 : }
586 929 : <xui>{dquote} {
587 6 : yyless(1);
588 6 : BEGIN(xuiend);
589 6 : ECHO;
590 : }
591 6 : <xuiend>{whitespace} {
592 4 : ECHO;
593 : }
594 4 : <xuiend>{other} |
595 : <xuiend>{xustop1} {
596 2 : yyless(0);
597 2 : BEGIN(INITIAL);
598 2 : ECHO;
599 : }
600 2 : <xuiend>{xustop2} {
601 4 : BEGIN(INITIAL);
602 4 : ECHO;
603 : }
604 4 : <xd,xui>{xddouble} {
605 3 : ECHO;
606 : }
607 3 : <xd,xui>{xdinside} {
608 939 : ECHO;
609 : }
610 939 :
611 : {xufailed} {
612 : /* throw back all but the initial u/U */
613 0 : yyless(1);
614 0 : ECHO;
615 : }
616 0 :
617 : {typecast} {
618 3149 : ECHO;
619 : }
620 3149 :
621 : {dot_dot} {
622 0 : ECHO;
623 : }
624 0 :
625 : {colon_equals} {
626 80 : ECHO;
627 : }
628 80 :
629 : {equals_greater} {
630 20 : ECHO;
631 : }
632 20 :
633 : {less_equals} {
634 92 : ECHO;
635 : }
636 92 :
637 : {greater_equals} {
638 143 : ECHO;
639 : }
640 143 :
641 : {less_greater} {
642 98 : ECHO;
643 : }
644 98 :
645 : {not_equals} {
646 450 : ECHO;
647 : }
648 450 :
649 : /*
650 : * These rules are specific to psql --- they implement parenthesis
651 : * counting and detection of command-ending semicolon. These must
652 : * appear before the {self} rule so that they take precedence over it.
653 : */
654 :
655 : "(" {
656 22493 : cur_state->paren_depth++;
657 22493 : ECHO;
658 : }
659 22493 :
660 : ")" {
661 22491 : if (cur_state->paren_depth > 0)
662 22491 : cur_state->paren_depth--;
663 22491 : ECHO;
664 : }
665 22491 :
666 : ";" {
667 24269 : ECHO;
668 24269 : if (cur_state->paren_depth == 0)
669 : {
670 : /* Terminate lexing temporarily */
671 24261 : cur_state->start_state = YY_START;
672 24261 : return LEXRES_SEMI;
673 : }
674 : }
675 8 :
676 : /*
677 : * psql-specific rules to handle backslash commands and variable
678 : * substitution. We want these before {self}, also.
679 : */
680 :
681 : "\\"[;:] {
682 : /* Force a semicolon or colon into the query buffer */
683 8 : psqlscan_emit(cur_state, yytext + 1, 1);
684 : }
685 8 :
686 : "\\" {
687 : /* Terminate lexing temporarily */
688 785 : cur_state->start_state = YY_START;
689 785 : return LEXRES_BACKSLASH;
690 : }
691 :
692 : :{variable_char}+ {
693 : /* Possible psql variable substitution */
694 : char *varname;
695 : char *value;
696 :
697 152 : varname = psqlscan_extract_substring(cur_state,
698 76 : yytext + 1,
699 76 : yyleng - 1);
700 76 : if (cur_state->callbacks->get_variable)
701 76 : value = cur_state->callbacks->get_variable(varname,
702 : PQUOTE_PLAIN,
703 : cur_state->cb_passthrough);
704 : else
705 0 : value = NULL;
706 :
707 76 : if (value)
708 : {
709 : /* It is a variable, check for recursion */
710 17 : if (psqlscan_var_is_current_source(cur_state, varname))
711 : {
712 : /* Recursive expansion --- don't go there */
713 0 : cur_state->callbacks->write_error("skipping recursive expansion of variable \"%s\"\n",
714 : varname);
715 : /* Instead copy the string as is */
716 0 : ECHO;
717 : }
718 : else
719 : {
720 : /* OK, perform substitution */
721 17 : psqlscan_push_new_buffer(cur_state, value, varname);
722 : /* yy_scan_string already made buffer active */
723 : }
724 17 : free(value);
725 : }
726 : else
727 : {
728 : /*
729 : * if the variable doesn't exist we'll copy the string
730 : * as is
731 : */
732 59 : ECHO;
733 : }
734 :
735 76 : free(varname);
736 : }
737 76 :
738 : :'{variable_char}+' {
739 1 : psqlscan_escape_variable(cur_state, yytext, yyleng,
740 : PQUOTE_SQL_LITERAL);
741 : }
742 1 :
743 : :\"{variable_char}+\" {
744 0 : psqlscan_escape_variable(cur_state, yytext, yyleng,
745 : PQUOTE_SQL_IDENT);
746 : }
747 0 :
748 : /*
749 : * These rules just avoid the need for scanner backup if one of the
750 : * two rules above fails to match completely.
751 : */
752 :
753 : :'{variable_char}* {
754 : /* Throw back everything but the colon */
755 0 : yyless(1);
756 0 : ECHO;
757 : }
758 0 :
759 : :\"{variable_char}* {
760 : /* Throw back everything but the colon */
761 0 : yyless(1);
762 0 : ECHO;
763 : }
764 0 :
765 : /*
766 : * Back to backend-compatible rules.
767 : */
768 :
769 : {self} {
770 41690 : ECHO;
771 : }
772 41690 :
773 : {operator} {
774 : /*
775 : * Check for embedded slash-star or dash-dash; those
776 : * are comment starts, so operator must stop there.
777 : * Note that slash-star or dash-dash at the first
778 : * character will match a prior rule, not this one.
779 : */
780 1501 : int nchars = yyleng;
781 1501 : char *slashstar = strstr(yytext, "/*");
782 1501 : char *dashdash = strstr(yytext, "--");
783 :
784 1501 : if (slashstar && dashdash)
785 : {
786 : /* if both appear, take the first one */
787 0 : if (slashstar > dashdash)
788 0 : slashstar = dashdash;
789 : }
790 1501 : else if (!slashstar)
791 1501 : slashstar = dashdash;
792 1501 : if (slashstar)
793 0 : nchars = slashstar - yytext;
794 :
795 : /*
796 : * For SQL compatibility, '+' and '-' cannot be the
797 : * last char of a multi-char operator unless the operator
798 : * contains chars that are not in SQL operators.
799 : * The idea is to lex '=-' as two operators, but not
800 : * to forbid operator names like '?-' that could not be
801 : * sequences of SQL operators.
802 : */
803 4342 : while (nchars > 1 &&
804 2668 : (yytext[nchars - 1] == '+' ||
805 1334 : yytext[nchars - 1] == '-'))
806 : {
807 : int ic;
808 :
809 33 : for (ic = nchars - 2; ic >= 0; ic--)
810 : {
811 27 : if (strchr("~!@#^&|`?%", yytext[ic]))
812 21 : break;
813 : }
814 27 : if (ic >= 0)
815 21 : break; /* found a char that makes it OK */
816 6 : nchars--; /* else remove the +/-, and check again */
817 : }
818 :
819 1501 : if (nchars < yyleng)
820 : {
821 : /* Strip the unwanted chars from the token */
822 6 : yyless(nchars);
823 : }
824 1501 : ECHO;
825 : }
826 1501 :
827 : {param} {
828 23 : ECHO;
829 : }
830 23 :
831 : {integer} {
832 13843 : ECHO;
833 : }
834 13843 : {decimal} {
835 729 : ECHO;
836 : }
837 729 : {decimalfail} {
838 : /* throw back the .., and treat as integer */
839 0 : yyless(yyleng - 2);
840 0 : ECHO;
841 : }
842 0 : {real} {
843 24 : ECHO;
844 : }
845 24 : {realfail1} {
846 : /*
847 : * throw back the [Ee], and treat as {decimal}. Note
848 : * that it is possible the input is actually {integer},
849 : * but since this case will almost certainly lead to a
850 : * syntax error anyway, we don't bother to distinguish.
851 : */
852 0 : yyless(yyleng - 1);
853 0 : ECHO;
854 : }
855 0 : {realfail2} {
856 : /* throw back the [Ee][+-], and proceed as above */
857 0 : yyless(yyleng - 2);
858 0 : ECHO;
859 : }
860 0 :
861 :
862 : {identifier} {
863 174233 : ECHO;
864 : }
865 174233 :
866 : {other} {
867 0 : ECHO;
868 : }
869 0 :
870 : <<EOF>> {
871 47191 : if (cur_state->buffer_stack == NULL)
872 : {
873 47174 : cur_state->start_state = YY_START;
874 47174 : return LEXRES_EOL; /* end of input reached */
875 : }
876 :
877 : /*
878 : * We were expanding a variable, so pop the inclusion
879 : * stack and keep lexing
880 : */
881 17 : psqlscan_pop_buffer_stack(cur_state);
882 17 : psqlscan_select_top_buffer(cur_state);
883 : }
884 17 :
885 0 : %%
886 0 :
887 : /*
888 : * Create a lexer working state struct.
889 : *
890 : * callbacks is a struct of function pointers that encapsulate some
891 : * behavior we need from the surrounding program. This struct must
892 : * remain valid for the lifespan of the PsqlScanState.
893 : */
894 : PsqlScanState
895 181 : psql_scan_create(const PsqlScanCallbacks *callbacks)
896 : {
897 : PsqlScanState state;
898 :
899 181 : state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
900 :
901 181 : state->callbacks = callbacks;
902 :
903 181 : yylex_init(&state->scanner);
904 :
905 181 : yyset_extra(state, state->scanner);
906 :
907 181 : psql_scan_reset(state);
908 :
909 181 : return state;
910 : }
911 :
912 : /*
913 : * Destroy a lexer working state struct, releasing all resources.
914 : */
915 : void
916 181 : psql_scan_destroy(PsqlScanState state)
917 : {
918 181 : psql_scan_finish(state);
919 :
920 181 : psql_scan_reset(state);
921 :
922 181 : yylex_destroy(state->scanner);
923 :
924 181 : free(state);
925 181 : }
926 :
927 : /*
928 : * Set the callback passthrough pointer for the lexer.
929 : *
930 : * This could have been integrated into psql_scan_create, but keeping it
931 : * separate allows the application to change the pointer later, which might
932 : * be useful.
933 : */
934 : void
935 181 : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
936 : {
937 181 : state->cb_passthrough = passthrough;
938 181 : }
939 :
940 : /*
941 : * Set up to perform lexing of the given input line.
942 : *
943 : * The text at *line, extending for line_len bytes, will be scanned by
944 : * subsequent calls to the psql_scan routines. psql_scan_finish should
945 : * be called when scanning is complete. Note that the lexer retains
946 : * a pointer to the storage at *line --- this string must not be altered
947 : * or freed until after psql_scan_finish is called.
948 : *
949 : * encoding is the libpq identifier for the character encoding in use,
950 : * and std_strings says whether standard_conforming_strings is on.
951 : */
952 : void
953 47174 : psql_scan_setup(PsqlScanState state,
954 : const char *line, int line_len,
955 : int encoding, bool std_strings)
956 : {
957 : /* Mustn't be scanning already */
958 47174 : Assert(state->scanbufhandle == NULL);
959 47174 : Assert(state->buffer_stack == NULL);
960 :
961 : /* Do we need to hack the character set encoding? */
962 47174 : state->encoding = encoding;
963 47174 : state->safe_encoding = pg_valid_server_encoding_id(encoding);
964 :
965 : /* Save standard-strings flag as well */
966 47174 : state->std_strings = std_strings;
967 :
968 : /* Set up flex input buffer with appropriate translation and padding */
969 47174 : state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
970 : &state->scanbuf);
971 47174 : state->scanline = line;
972 :
973 : /* Set lookaside data in case we have to map unsafe encoding */
974 47174 : state->curline = state->scanbuf;
975 47174 : state->refline = state->scanline;
976 47174 : }
977 :
978 : /*
979 : * Do lexical analysis of SQL command text.
980 : *
981 : * The text previously passed to psql_scan_setup is scanned, and appended
982 : * (possibly with transformation) to query_buf.
983 : *
984 : * The return value indicates the condition that stopped scanning:
985 : *
986 : * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
987 : * transferred to query_buf.) The command accumulated in query_buf should
988 : * be executed, then clear query_buf and call again to scan the remainder
989 : * of the line.
990 : *
991 : * PSCAN_BACKSLASH: found a backslash that starts a special command.
992 : * Any previous data on the line has been transferred to query_buf.
993 : * The caller will typically next apply a separate flex lexer to scan
994 : * the special command.
995 : *
996 : * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
997 : * incomplete SQL command. *prompt is set to the appropriate prompt type.
998 : *
999 : * PSCAN_EOL: the end of the line was reached, and there is no lexical
1000 : * reason to consider the command incomplete. The caller may or may not
1001 : * choose to send it. *prompt is set to the appropriate prompt type if
1002 : * the caller chooses to collect more input.
1003 : *
1004 : * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1005 : * be called next, then the cycle may be repeated with a fresh input line.
1006 : *
1007 : * In all cases, *prompt is set to an appropriate prompt type code for the
1008 : * next line-input operation.
1009 : */
1010 : PsqlScanResult
1011 72220 : psql_scan(PsqlScanState state,
1012 : PQExpBuffer query_buf,
1013 : promptStatus_t *prompt)
1014 : {
1015 : PsqlScanResult result;
1016 : int lexresult;
1017 :
1018 : /* Must be scanning already */
1019 72220 : Assert(state->scanbufhandle != NULL);
1020 :
1021 : /* Set current output target */
1022 72220 : state->output_buf = query_buf;
1023 :
1024 : /* Set input source */
1025 72220 : if (state->buffer_stack != NULL)
1026 0 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
1027 : else
1028 72220 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1029 :
1030 : /* And lex. */
1031 72220 : lexresult = yylex(NULL, state->scanner);
1032 :
1033 : /*
1034 : * Check termination state and return appropriate result info.
1035 : */
1036 72220 : switch (lexresult)
1037 : {
1038 : case LEXRES_EOL: /* end of input */
1039 47174 : switch (state->start_state)
1040 : {
1041 : case INITIAL:
1042 : case xuiend: /* we treat these like INITIAL */
1043 : case xusend:
1044 43215 : if (state->paren_depth > 0)
1045 : {
1046 3239 : result = PSCAN_INCOMPLETE;
1047 3239 : *prompt = PROMPT_PAREN;
1048 : }
1049 39976 : else if (query_buf->len > 0)
1050 : {
1051 7209 : result = PSCAN_EOL;
1052 7209 : *prompt = PROMPT_CONTINUE;
1053 : }
1054 : else
1055 : {
1056 : /* never bother to send an empty buffer */
1057 32767 : result = PSCAN_INCOMPLETE;
1058 32767 : *prompt = PROMPT_READY;
1059 : }
1060 43215 : break;
1061 : case xb:
1062 0 : result = PSCAN_INCOMPLETE;
1063 0 : *prompt = PROMPT_SINGLEQUOTE;
1064 0 : break;
1065 : case xc:
1066 36 : result = PSCAN_INCOMPLETE;
1067 36 : *prompt = PROMPT_COMMENT;
1068 36 : break;
1069 : case xd:
1070 3 : result = PSCAN_INCOMPLETE;
1071 3 : *prompt = PROMPT_DOUBLEQUOTE;
1072 3 : break;
1073 : case xh:
1074 0 : result = PSCAN_INCOMPLETE;
1075 0 : *prompt = PROMPT_SINGLEQUOTE;
1076 0 : break;
1077 : case xe:
1078 0 : result = PSCAN_INCOMPLETE;
1079 0 : *prompt = PROMPT_SINGLEQUOTE;
1080 0 : break;
1081 : case xq:
1082 1024 : result = PSCAN_INCOMPLETE;
1083 1024 : *prompt = PROMPT_SINGLEQUOTE;
1084 1024 : break;
1085 : case xdolq:
1086 2896 : result = PSCAN_INCOMPLETE;
1087 2896 : *prompt = PROMPT_DOLLARQUOTE;
1088 2896 : break;
1089 : case xui:
1090 0 : result = PSCAN_INCOMPLETE;
1091 0 : *prompt = PROMPT_DOUBLEQUOTE;
1092 0 : break;
1093 : case xus:
1094 0 : result = PSCAN_INCOMPLETE;
1095 0 : *prompt = PROMPT_SINGLEQUOTE;
1096 0 : break;
1097 : default:
1098 : /* can't get here */
1099 0 : fprintf(stderr, "invalid YY_START\n");
1100 0 : exit(1);
1101 : }
1102 47174 : break;
1103 : case LEXRES_SEMI: /* semicolon */
1104 24261 : result = PSCAN_SEMICOLON;
1105 24261 : *prompt = PROMPT_READY;
1106 24261 : break;
1107 : case LEXRES_BACKSLASH: /* backslash */
1108 785 : result = PSCAN_BACKSLASH;
1109 785 : *prompt = PROMPT_READY;
1110 785 : break;
1111 : default:
1112 : /* can't get here */
1113 0 : fprintf(stderr, "invalid yylex result\n");
1114 0 : exit(1);
1115 : }
1116 :
1117 72220 : return result;
1118 : }
1119 :
1120 : /*
1121 : * Clean up after scanning a string. This flushes any unread input and
1122 : * releases resources (but not the PsqlScanState itself). Note however
1123 : * that this does not reset the lexer scan state; that can be done by
1124 : * psql_scan_reset(), which is an orthogonal operation.
1125 : *
1126 : * It is legal to call this when not scanning anything (makes it easier
1127 : * to deal with error recovery).
1128 : */
1129 : void
1130 47355 : psql_scan_finish(PsqlScanState state)
1131 : {
1132 : /* Drop any incomplete variable expansions. */
1133 94710 : while (state->buffer_stack != NULL)
1134 0 : psqlscan_pop_buffer_stack(state);
1135 :
1136 : /* Done with the outer scan buffer, too */
1137 47355 : if (state->scanbufhandle)
1138 47174 : yy_delete_buffer(state->scanbufhandle, state->scanner);
1139 47355 : state->scanbufhandle = NULL;
1140 47355 : if (state->scanbuf)
1141 47174 : free(state->scanbuf);
1142 47355 : state->scanbuf = NULL;
1143 47355 : }
1144 :
1145 : /*
1146 : * Reset lexer scanning state to start conditions. This is appropriate
1147 : * for executing \r psql commands (or any other time that we discard the
1148 : * prior contents of query_buf). It is not, however, necessary to do this
1149 : * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1150 : * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1151 : * conditions are returned.
1152 : *
1153 : * Note that this is unrelated to flushing unread input; that task is
1154 : * done by psql_scan_finish().
1155 : */
1156 : void
1157 421 : psql_scan_reset(PsqlScanState state)
1158 : {
1159 421 : state->start_state = INITIAL;
1160 421 : state->paren_depth = 0;
1161 421 : state->xcdepth = 0; /* not really necessary */
1162 421 : if (state->dolqstart)
1163 0 : free(state->dolqstart);
1164 421 : state->dolqstart = NULL;
1165 421 : }
1166 :
1167 : /*
1168 : * Reselect this lexer (psqlscan.l) after using another one.
1169 : *
1170 : * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
1171 : * state, because we'd never switch to another lexer in a different state.
1172 : * However, we don't want to reset e.g. paren_depth, so this can't be
1173 : * the same as psql_scan_reset().
1174 : *
1175 : * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1176 : * must be a superset of this.
1177 : *
1178 : * Note: it seems likely that other lexers could just assign INITIAL for
1179 : * themselves, since that probably has the value zero in every flex-generated
1180 : * lexer. But let's not assume that.
1181 : */
1182 : void
1183 3644 : psql_scan_reselect_sql_lexer(PsqlScanState state)
1184 : {
1185 3644 : state->start_state = INITIAL;
1186 3644 : }
1187 :
1188 : /*
1189 : * Return true if lexer is currently in an "inside quotes" state.
1190 : *
1191 : * This is pretty grotty but is needed to preserve the old behavior
1192 : * that mainloop.c drops blank lines not inside quotes without even
1193 : * echoing them.
1194 : */
1195 : bool
1196 11390 : psql_scan_in_quote(PsqlScanState state)
1197 : {
1198 11390 : return state->start_state != INITIAL;
1199 : }
1200 :
1201 : /*
1202 : * Push the given string onto the stack of stuff to scan.
1203 : *
1204 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1205 : */
1206 : void
1207 17 : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
1208 : const char *varname)
1209 : {
1210 : StackElem *stackelem;
1211 :
1212 17 : stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
1213 :
1214 : /*
1215 : * In current usage, the passed varname points at the current flex input
1216 : * buffer; we must copy it before calling psqlscan_prepare_buffer()
1217 : * because that will change the buffer state.
1218 : */
1219 17 : stackelem->varname = varname ? pg_strdup(varname) : NULL;
1220 :
1221 17 : stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
1222 : &stackelem->bufstring);
1223 17 : state->curline = stackelem->bufstring;
1224 17 : if (state->safe_encoding)
1225 : {
1226 17 : stackelem->origstring = NULL;
1227 17 : state->refline = stackelem->bufstring;
1228 : }
1229 : else
1230 : {
1231 0 : stackelem->origstring = pg_strdup(newstr);
1232 0 : state->refline = stackelem->origstring;
1233 : }
1234 17 : stackelem->next = state->buffer_stack;
1235 17 : state->buffer_stack = stackelem;
1236 17 : }
1237 :
1238 : /*
1239 : * Pop the topmost buffer stack item (there must be one!)
1240 : *
1241 : * NB: after this, the flex input state is unspecified; caller must
1242 : * switch to an appropriate buffer to continue lexing.
1243 : * See psqlscan_select_top_buffer().
1244 : */
1245 : void
1246 17 : psqlscan_pop_buffer_stack(PsqlScanState state)
1247 : {
1248 17 : StackElem *stackelem = state->buffer_stack;
1249 :
1250 17 : state->buffer_stack = stackelem->next;
1251 17 : yy_delete_buffer(stackelem->buf, state->scanner);
1252 17 : free(stackelem->bufstring);
1253 17 : if (stackelem->origstring)
1254 0 : free(stackelem->origstring);
1255 17 : if (stackelem->varname)
1256 17 : free(stackelem->varname);
1257 17 : free(stackelem);
1258 17 : }
1259 :
1260 : /*
1261 : * Select the topmost surviving buffer as the active input.
1262 : */
1263 : void
1264 17 : psqlscan_select_top_buffer(PsqlScanState state)
1265 : {
1266 17 : StackElem *stackelem = state->buffer_stack;
1267 :
1268 17 : if (stackelem != NULL)
1269 : {
1270 0 : yy_switch_to_buffer(stackelem->buf, state->scanner);
1271 0 : state->curline = stackelem->bufstring;
1272 0 : state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1273 : }
1274 : else
1275 : {
1276 17 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1277 17 : state->curline = state->scanbuf;
1278 17 : state->refline = state->scanline;
1279 : }
1280 17 : }
1281 :
1282 : /*
1283 : * Check if specified variable name is the source for any string
1284 : * currently being scanned
1285 : */
1286 : bool
1287 17 : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
1288 : {
1289 : StackElem *stackelem;
1290 :
1291 34 : for (stackelem = state->buffer_stack;
1292 : stackelem != NULL;
1293 0 : stackelem = stackelem->next)
1294 : {
1295 0 : if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
1296 0 : return true;
1297 : }
1298 17 : return false;
1299 : }
1300 :
1301 : /*
1302 : * Set up a flex input buffer to scan the given data. We always make a
1303 : * copy of the data. If working in an unsafe encoding, the copy has
1304 : * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1305 : *
1306 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1307 : */
1308 : YY_BUFFER_STATE
1309 47191 : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
1310 : char **txtcopy)
1311 : {
1312 : char *newtxt;
1313 :
1314 : /* Flex wants two \0 characters after the actual data */
1315 47191 : newtxt = pg_malloc(len + 2);
1316 47191 : *txtcopy = newtxt;
1317 47191 : newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1318 :
1319 47191 : if (state->safe_encoding)
1320 47191 : memcpy(newtxt, txt, len);
1321 : else
1322 : {
1323 : /* Gotta do it the hard way */
1324 0 : int i = 0;
1325 :
1326 0 : while (i < len)
1327 : {
1328 0 : int thislen = PQmblen(txt + i, state->encoding);
1329 :
1330 : /* first byte should always be okay... */
1331 0 : newtxt[i] = txt[i];
1332 0 : i++;
1333 0 : while (--thislen > 0 && i < len)
1334 0 : newtxt[i++] = (char) 0xFF;
1335 : }
1336 : }
1337 :
1338 47191 : return yy_scan_buffer(newtxt, len + 2, state->scanner);
1339 : }
1340 :
1341 : /*
1342 : * psqlscan_emit() --- body for ECHO macro
1343 : *
1344 : * NB: this must be used for ALL and ONLY the text copied from the flex
1345 : * input data. If you pass it something that is not part of the yytext
1346 : * string, you are making a mistake. Internally generated text can be
1347 : * appended directly to state->output_buf.
1348 : */
1349 : void
1350 532948 : psqlscan_emit(PsqlScanState state, const char *txt, int len)
1351 : {
1352 532948 : PQExpBuffer output_buf = state->output_buf;
1353 :
1354 532948 : if (state->safe_encoding)
1355 532948 : appendBinaryPQExpBuffer(output_buf, txt, len);
1356 : else
1357 : {
1358 : /* Gotta do it the hard way */
1359 0 : const char *reference = state->refline;
1360 : int i;
1361 :
1362 0 : reference += (txt - state->curline);
1363 :
1364 0 : for (i = 0; i < len; i++)
1365 : {
1366 0 : char ch = txt[i];
1367 :
1368 0 : if (ch == (char) 0xFF)
1369 0 : ch = reference[i];
1370 0 : appendPQExpBufferChar(output_buf, ch);
1371 : }
1372 : }
1373 532948 : }
1374 :
1375 : /*
1376 : * psqlscan_extract_substring --- fetch value of (part of) the current token
1377 : *
1378 : * This is like psqlscan_emit(), except that the data is returned as a
1379 : * malloc'd string rather than being pushed directly to state->output_buf.
1380 : */
1381 : char *
1382 116 : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
1383 : {
1384 116 : char *result = (char *) pg_malloc(len + 1);
1385 :
1386 116 : if (state->safe_encoding)
1387 116 : memcpy(result, txt, len);
1388 : else
1389 : {
1390 : /* Gotta do it the hard way */
1391 0 : const char *reference = state->refline;
1392 : int i;
1393 :
1394 0 : reference += (txt - state->curline);
1395 :
1396 0 : for (i = 0; i < len; i++)
1397 : {
1398 0 : char ch = txt[i];
1399 :
1400 0 : if (ch == (char) 0xFF)
1401 0 : ch = reference[i];
1402 0 : result[i] = ch;
1403 : }
1404 : }
1405 116 : result[len] = '\0';
1406 116 : return result;
1407 : }
1408 :
1409 : /*
1410 : * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1411 : *
1412 : * If the variable name is found, escape its value using the appropriate
1413 : * quoting method and emit the value to output_buf. (Since the result is
1414 : * surely quoted, there is never any reason to rescan it.) If we don't
1415 : * find the variable or escaping fails, emit the token as-is.
1416 : */
1417 : void
1418 9 : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
1419 : PsqlScanQuoteType quote)
1420 : {
1421 : char *varname;
1422 : char *value;
1423 :
1424 : /* Variable lookup. */
1425 9 : varname = psqlscan_extract_substring(state, txt + 2, len - 3);
1426 9 : if (state->callbacks->get_variable)
1427 9 : value = state->callbacks->get_variable(varname, quote,
1428 : state->cb_passthrough);
1429 : else
1430 0 : value = NULL;
1431 9 : free(varname);
1432 :
1433 9 : if (value)
1434 : {
1435 : /* Emit the suitably-escaped value */
1436 3 : appendPQExpBufferStr(state->output_buf, value);
1437 3 : free(value);
1438 : }
1439 : else
1440 : {
1441 : /* Emit original token as-is */
1442 6 : psqlscan_emit(state, txt, len);
1443 : }
1444 9 : }
|