Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * pl_scanner.c
4 : * lexical scanning for PL/pgSQL
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : *
11 : * IDENTIFICATION
12 : * src/pl/plpgsql/src/pl_scanner.c
13 : *
14 : *-------------------------------------------------------------------------
15 : */
16 : #include "postgres.h"
17 :
18 : #include "mb/pg_wchar.h"
19 : #include "parser/scanner.h"
20 :
21 : #include "plpgsql.h"
22 : #include "pl_gram.h" /* must be after parser/scanner.h */
23 :
24 :
25 : #define PG_KEYWORD(a,b,c) {a,b,c},
26 :
27 :
28 : /* Klugy flag to tell scanner how to look up identifiers */
29 : IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
30 :
31 : /*
32 : * A word about keywords:
33 : *
34 : * We keep reserved and unreserved keywords in separate arrays. The
35 : * reserved keywords are passed to the core scanner, so they will be
36 : * recognized before (and instead of) any variable name. Unreserved words
37 : * are checked for separately, usually after determining that the identifier
38 : * isn't a known variable name. If plpgsql_IdentifierLookup is DECLARE then
39 : * no variable names will be recognized, so the unreserved words always work.
40 : * (Note in particular that this helps us avoid reserving keywords that are
41 : * only needed in DECLARE sections.)
42 : *
43 : * In certain contexts it is desirable to prefer recognizing an unreserved
44 : * keyword over recognizing a variable name. In particular, at the start
45 : * of a statement we should prefer unreserved keywords unless the statement
46 : * looks like an assignment (i.e., first token is followed by ':=' or '[').
47 : * This rule allows most statement-introducing keywords to be kept unreserved.
48 : * (We still have to reserve initial keywords that might follow a block
49 : * label, unfortunately, since the method used to determine if we are at
50 : * start of statement doesn't recognize such cases. We'd also have to
51 : * reserve any keyword that could legitimately be followed by ':=' or '['.)
52 : * Some additional cases are handled in pl_gram.y using tok_is_keyword().
53 : *
54 : * We try to avoid reserving more keywords than we have to; but there's
55 : * little point in not reserving a word if it's reserved in the core grammar.
56 : * Currently, the following words are reserved here but not in the core:
57 : * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE
58 : */
59 :
60 : /*
61 : * Lists of keyword (name, token-value, category) entries.
62 : *
63 : * !!WARNING!!: These lists must be sorted by ASCII name, because binary
64 : * search is used to locate entries.
65 : *
66 : * Be careful not to put the same word in both lists. Also be sure that
67 : * pl_gram.y's unreserved_keyword production agrees with the second list.
68 : */
69 :
70 : static const ScanKeyword reserved_keywords[] = {
71 : PG_KEYWORD("all", K_ALL, RESERVED_KEYWORD)
72 : PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
73 : PG_KEYWORD("by", K_BY, RESERVED_KEYWORD)
74 : PG_KEYWORD("case", K_CASE, RESERVED_KEYWORD)
75 : PG_KEYWORD("declare", K_DECLARE, RESERVED_KEYWORD)
76 : PG_KEYWORD("else", K_ELSE, RESERVED_KEYWORD)
77 : PG_KEYWORD("end", K_END, RESERVED_KEYWORD)
78 : PG_KEYWORD("execute", K_EXECUTE, RESERVED_KEYWORD)
79 : PG_KEYWORD("for", K_FOR, RESERVED_KEYWORD)
80 : PG_KEYWORD("foreach", K_FOREACH, RESERVED_KEYWORD)
81 : PG_KEYWORD("from", K_FROM, RESERVED_KEYWORD)
82 : PG_KEYWORD("if", K_IF, RESERVED_KEYWORD)
83 : PG_KEYWORD("in", K_IN, RESERVED_KEYWORD)
84 : PG_KEYWORD("into", K_INTO, RESERVED_KEYWORD)
85 : PG_KEYWORD("loop", K_LOOP, RESERVED_KEYWORD)
86 : PG_KEYWORD("not", K_NOT, RESERVED_KEYWORD)
87 : PG_KEYWORD("null", K_NULL, RESERVED_KEYWORD)
88 : PG_KEYWORD("or", K_OR, RESERVED_KEYWORD)
89 : PG_KEYWORD("strict", K_STRICT, RESERVED_KEYWORD)
90 : PG_KEYWORD("then", K_THEN, RESERVED_KEYWORD)
91 : PG_KEYWORD("to", K_TO, RESERVED_KEYWORD)
92 : PG_KEYWORD("using", K_USING, RESERVED_KEYWORD)
93 : PG_KEYWORD("when", K_WHEN, RESERVED_KEYWORD)
94 : PG_KEYWORD("while", K_WHILE, RESERVED_KEYWORD)
95 : };
96 :
97 : static const int num_reserved_keywords = lengthof(reserved_keywords);
98 :
99 : static const ScanKeyword unreserved_keywords[] = {
100 : PG_KEYWORD("absolute", K_ABSOLUTE, UNRESERVED_KEYWORD)
101 : PG_KEYWORD("alias", K_ALIAS, UNRESERVED_KEYWORD)
102 : PG_KEYWORD("array", K_ARRAY, UNRESERVED_KEYWORD)
103 : PG_KEYWORD("assert", K_ASSERT, UNRESERVED_KEYWORD)
104 : PG_KEYWORD("backward", K_BACKWARD, UNRESERVED_KEYWORD)
105 : PG_KEYWORD("close", K_CLOSE, UNRESERVED_KEYWORD)
106 : PG_KEYWORD("collate", K_COLLATE, UNRESERVED_KEYWORD)
107 : PG_KEYWORD("column", K_COLUMN, UNRESERVED_KEYWORD)
108 : PG_KEYWORD("column_name", K_COLUMN_NAME, UNRESERVED_KEYWORD)
109 : PG_KEYWORD("constant", K_CONSTANT, UNRESERVED_KEYWORD)
110 : PG_KEYWORD("constraint", K_CONSTRAINT, UNRESERVED_KEYWORD)
111 : PG_KEYWORD("constraint_name", K_CONSTRAINT_NAME, UNRESERVED_KEYWORD)
112 : PG_KEYWORD("continue", K_CONTINUE, UNRESERVED_KEYWORD)
113 : PG_KEYWORD("current", K_CURRENT, UNRESERVED_KEYWORD)
114 : PG_KEYWORD("cursor", K_CURSOR, UNRESERVED_KEYWORD)
115 : PG_KEYWORD("datatype", K_DATATYPE, UNRESERVED_KEYWORD)
116 : PG_KEYWORD("debug", K_DEBUG, UNRESERVED_KEYWORD)
117 : PG_KEYWORD("default", K_DEFAULT, UNRESERVED_KEYWORD)
118 : PG_KEYWORD("detail", K_DETAIL, UNRESERVED_KEYWORD)
119 : PG_KEYWORD("diagnostics", K_DIAGNOSTICS, UNRESERVED_KEYWORD)
120 : PG_KEYWORD("dump", K_DUMP, UNRESERVED_KEYWORD)
121 : PG_KEYWORD("elseif", K_ELSIF, UNRESERVED_KEYWORD)
122 : PG_KEYWORD("elsif", K_ELSIF, UNRESERVED_KEYWORD)
123 : PG_KEYWORD("errcode", K_ERRCODE, UNRESERVED_KEYWORD)
124 : PG_KEYWORD("error", K_ERROR, UNRESERVED_KEYWORD)
125 : PG_KEYWORD("exception", K_EXCEPTION, UNRESERVED_KEYWORD)
126 : PG_KEYWORD("exit", K_EXIT, UNRESERVED_KEYWORD)
127 : PG_KEYWORD("fetch", K_FETCH, UNRESERVED_KEYWORD)
128 : PG_KEYWORD("first", K_FIRST, UNRESERVED_KEYWORD)
129 : PG_KEYWORD("forward", K_FORWARD, UNRESERVED_KEYWORD)
130 : PG_KEYWORD("get", K_GET, UNRESERVED_KEYWORD)
131 : PG_KEYWORD("hint", K_HINT, UNRESERVED_KEYWORD)
132 : PG_KEYWORD("import", K_IMPORT, UNRESERVED_KEYWORD)
133 : PG_KEYWORD("info", K_INFO, UNRESERVED_KEYWORD)
134 : PG_KEYWORD("insert", K_INSERT, UNRESERVED_KEYWORD)
135 : PG_KEYWORD("is", K_IS, UNRESERVED_KEYWORD)
136 : PG_KEYWORD("last", K_LAST, UNRESERVED_KEYWORD)
137 : PG_KEYWORD("log", K_LOG, UNRESERVED_KEYWORD)
138 : PG_KEYWORD("message", K_MESSAGE, UNRESERVED_KEYWORD)
139 : PG_KEYWORD("message_text", K_MESSAGE_TEXT, UNRESERVED_KEYWORD)
140 : PG_KEYWORD("move", K_MOVE, UNRESERVED_KEYWORD)
141 : PG_KEYWORD("next", K_NEXT, UNRESERVED_KEYWORD)
142 : PG_KEYWORD("no", K_NO, UNRESERVED_KEYWORD)
143 : PG_KEYWORD("notice", K_NOTICE, UNRESERVED_KEYWORD)
144 : PG_KEYWORD("open", K_OPEN, UNRESERVED_KEYWORD)
145 : PG_KEYWORD("option", K_OPTION, UNRESERVED_KEYWORD)
146 : PG_KEYWORD("perform", K_PERFORM, UNRESERVED_KEYWORD)
147 : PG_KEYWORD("pg_context", K_PG_CONTEXT, UNRESERVED_KEYWORD)
148 : PG_KEYWORD("pg_datatype_name", K_PG_DATATYPE_NAME, UNRESERVED_KEYWORD)
149 : PG_KEYWORD("pg_exception_context", K_PG_EXCEPTION_CONTEXT, UNRESERVED_KEYWORD)
150 : PG_KEYWORD("pg_exception_detail", K_PG_EXCEPTION_DETAIL, UNRESERVED_KEYWORD)
151 : PG_KEYWORD("pg_exception_hint", K_PG_EXCEPTION_HINT, UNRESERVED_KEYWORD)
152 : PG_KEYWORD("print_strict_params", K_PRINT_STRICT_PARAMS, UNRESERVED_KEYWORD)
153 : PG_KEYWORD("prior", K_PRIOR, UNRESERVED_KEYWORD)
154 : PG_KEYWORD("query", K_QUERY, UNRESERVED_KEYWORD)
155 : PG_KEYWORD("raise", K_RAISE, UNRESERVED_KEYWORD)
156 : PG_KEYWORD("relative", K_RELATIVE, UNRESERVED_KEYWORD)
157 : PG_KEYWORD("result_oid", K_RESULT_OID, UNRESERVED_KEYWORD)
158 : PG_KEYWORD("return", K_RETURN, UNRESERVED_KEYWORD)
159 : PG_KEYWORD("returned_sqlstate", K_RETURNED_SQLSTATE, UNRESERVED_KEYWORD)
160 : PG_KEYWORD("reverse", K_REVERSE, UNRESERVED_KEYWORD)
161 : PG_KEYWORD("row_count", K_ROW_COUNT, UNRESERVED_KEYWORD)
162 : PG_KEYWORD("rowtype", K_ROWTYPE, UNRESERVED_KEYWORD)
163 : PG_KEYWORD("schema", K_SCHEMA, UNRESERVED_KEYWORD)
164 : PG_KEYWORD("schema_name", K_SCHEMA_NAME, UNRESERVED_KEYWORD)
165 : PG_KEYWORD("scroll", K_SCROLL, UNRESERVED_KEYWORD)
166 : PG_KEYWORD("slice", K_SLICE, UNRESERVED_KEYWORD)
167 : PG_KEYWORD("sqlstate", K_SQLSTATE, UNRESERVED_KEYWORD)
168 : PG_KEYWORD("stacked", K_STACKED, UNRESERVED_KEYWORD)
169 : PG_KEYWORD("table", K_TABLE, UNRESERVED_KEYWORD)
170 : PG_KEYWORD("table_name", K_TABLE_NAME, UNRESERVED_KEYWORD)
171 : PG_KEYWORD("type", K_TYPE, UNRESERVED_KEYWORD)
172 : PG_KEYWORD("use_column", K_USE_COLUMN, UNRESERVED_KEYWORD)
173 : PG_KEYWORD("use_variable", K_USE_VARIABLE, UNRESERVED_KEYWORD)
174 : PG_KEYWORD("variable_conflict", K_VARIABLE_CONFLICT, UNRESERVED_KEYWORD)
175 : PG_KEYWORD("warning", K_WARNING, UNRESERVED_KEYWORD)
176 : };
177 :
178 : static const int num_unreserved_keywords = lengthof(unreserved_keywords);
179 :
180 : /*
181 : * This macro must recognize all tokens that can immediately precede a
182 : * PL/pgSQL executable statement (that is, proc_sect or proc_stmt in the
183 : * grammar). Fortunately, there are not very many, so hard-coding in this
184 : * fashion seems sufficient.
185 : */
186 : #define AT_STMT_START(prev_token) \
187 : ((prev_token) == ';' || \
188 : (prev_token) == K_BEGIN || \
189 : (prev_token) == K_THEN || \
190 : (prev_token) == K_ELSE || \
191 : (prev_token) == K_LOOP)
192 :
193 :
194 : /* Auxiliary data about a token (other than the token type) */
195 : typedef struct
196 : {
197 : YYSTYPE lval; /* semantic information */
198 : YYLTYPE lloc; /* offset in scanbuf */
199 : int leng; /* length in bytes */
200 : } TokenAuxData;
201 :
202 : /*
203 : * Scanner working state. At some point we might wish to fold all this
204 : * into a YY_EXTRA struct. For the moment, there is no need for plpgsql's
205 : * lexer to be re-entrant, and the notational burden of passing a yyscanner
206 : * pointer around is great enough to not want to do it without need.
207 : */
208 :
209 : /* The stuff the core lexer needs */
210 : static core_yyscan_t yyscanner = NULL;
211 : static core_yy_extra_type core_yy;
212 :
213 : /* The original input string */
214 : static const char *scanorig;
215 :
216 : /* Current token's length (corresponds to plpgsql_yylval and plpgsql_yylloc) */
217 : static int plpgsql_yyleng;
218 :
219 : /* Current token's code (corresponds to plpgsql_yylval and plpgsql_yylloc) */
220 : static int plpgsql_yytoken;
221 :
222 : /* Token pushback stack */
223 : #define MAX_PUSHBACKS 4
224 :
225 : static int num_pushbacks;
226 : static int pushback_token[MAX_PUSHBACKS];
227 : static TokenAuxData pushback_auxdata[MAX_PUSHBACKS];
228 :
229 : /* State for plpgsql_location_to_lineno() */
230 : static const char *cur_line_start;
231 : static const char *cur_line_end;
232 : static int cur_line_num;
233 :
234 : /* Internal functions */
235 : static int internal_yylex(TokenAuxData *auxdata);
236 : static void push_back_token(int token, TokenAuxData *auxdata);
237 : static void location_lineno_init(void);
238 :
239 :
240 : /*
241 : * This is the yylex routine called from the PL/pgSQL grammar.
242 : * It is a wrapper around the core lexer, with the ability to recognize
243 : * PL/pgSQL variables and return them as special T_DATUM tokens. If a
244 : * word or compound word does not match any variable name, or if matching
245 : * is turned off by plpgsql_IdentifierLookup, it is returned as
246 : * T_WORD or T_CWORD respectively, or as an unreserved keyword if it
247 : * matches one of those.
248 : */
249 : int
250 25848 : plpgsql_yylex(void)
251 : {
252 : int tok1;
253 : TokenAuxData aux1;
254 : const ScanKeyword *kw;
255 :
256 25848 : tok1 = internal_yylex(&aux1);
257 25848 : if (tok1 == IDENT || tok1 == PARAM)
258 : {
259 : int tok2;
260 : TokenAuxData aux2;
261 :
262 8726 : tok2 = internal_yylex(&aux2);
263 8726 : if (tok2 == '.')
264 : {
265 : int tok3;
266 : TokenAuxData aux3;
267 :
268 923 : tok3 = internal_yylex(&aux3);
269 923 : if (tok3 == IDENT)
270 : {
271 : int tok4;
272 : TokenAuxData aux4;
273 :
274 901 : tok4 = internal_yylex(&aux4);
275 901 : if (tok4 == '.')
276 : {
277 : int tok5;
278 : TokenAuxData aux5;
279 :
280 2 : tok5 = internal_yylex(&aux5);
281 2 : if (tok5 == IDENT)
282 : {
283 2 : if (plpgsql_parse_tripword(aux1.lval.str,
284 : aux3.lval.str,
285 : aux5.lval.str,
286 : &aux1.lval.wdatum,
287 : &aux1.lval.cword))
288 2 : tok1 = T_DATUM;
289 : else
290 0 : tok1 = T_CWORD;
291 : }
292 : else
293 : {
294 : /* not A.B.C, so just process A.B */
295 0 : push_back_token(tok5, &aux5);
296 0 : push_back_token(tok4, &aux4);
297 0 : if (plpgsql_parse_dblword(aux1.lval.str,
298 : aux3.lval.str,
299 : &aux1.lval.wdatum,
300 : &aux1.lval.cword))
301 0 : tok1 = T_DATUM;
302 : else
303 0 : tok1 = T_CWORD;
304 : }
305 : }
306 : else
307 : {
308 : /* not A.B.C, so just process A.B */
309 899 : push_back_token(tok4, &aux4);
310 899 : if (plpgsql_parse_dblword(aux1.lval.str,
311 : aux3.lval.str,
312 : &aux1.lval.wdatum,
313 : &aux1.lval.cword))
314 831 : tok1 = T_DATUM;
315 : else
316 68 : tok1 = T_CWORD;
317 : }
318 : }
319 : else
320 : {
321 : /* not A.B, so just process A */
322 22 : push_back_token(tok3, &aux3);
323 22 : push_back_token(tok2, &aux2);
324 22 : if (plpgsql_parse_word(aux1.lval.str,
325 22 : core_yy.scanbuf + aux1.lloc,
326 : &aux1.lval.wdatum,
327 : &aux1.lval.word))
328 0 : tok1 = T_DATUM;
329 44 : else if (!aux1.lval.word.quoted &&
330 22 : (kw = ScanKeywordLookup(aux1.lval.word.ident,
331 : unreserved_keywords,
332 : num_unreserved_keywords)))
333 : {
334 0 : aux1.lval.keyword = kw->name;
335 0 : tok1 = kw->value;
336 : }
337 : else
338 22 : tok1 = T_WORD;
339 : }
340 : }
341 : else
342 : {
343 : /* not A.B, so just process A */
344 7803 : push_back_token(tok2, &aux2);
345 :
346 : /*
347 : * If we are at start of statement, prefer unreserved keywords
348 : * over variable names, unless the next token is assignment or
349 : * '[', in which case prefer variable names. (Note we need not
350 : * consider '.' as the next token; that case was handled above,
351 : * and we always prefer variable names in that case.) If we are
352 : * not at start of statement, always prefer variable names over
353 : * unreserved keywords.
354 : */
355 7803 : if (AT_STMT_START(plpgsql_yytoken) &&
356 2059 : !(tok2 == '=' || tok2 == COLON_EQUALS || tok2 == '['))
357 : {
358 : /* try for unreserved keyword, then for variable name */
359 5370 : if (core_yy.scanbuf[aux1.lloc] != '"' &&
360 1790 : (kw = ScanKeywordLookup(aux1.lval.str,
361 : unreserved_keywords,
362 : num_unreserved_keywords)))
363 : {
364 1396 : aux1.lval.keyword = kw->name;
365 1396 : tok1 = kw->value;
366 : }
367 394 : else if (plpgsql_parse_word(aux1.lval.str,
368 394 : core_yy.scanbuf + aux1.lloc,
369 : &aux1.lval.wdatum,
370 : &aux1.lval.word))
371 0 : tok1 = T_DATUM;
372 : else
373 394 : tok1 = T_WORD;
374 : }
375 : else
376 : {
377 : /* try for variable name, then for unreserved keyword */
378 6013 : if (plpgsql_parse_word(aux1.lval.str,
379 6013 : core_yy.scanbuf + aux1.lloc,
380 : &aux1.lval.wdatum,
381 : &aux1.lval.word))
382 911 : tok1 = T_DATUM;
383 10204 : else if (!aux1.lval.word.quoted &&
384 5102 : (kw = ScanKeywordLookup(aux1.lval.word.ident,
385 : unreserved_keywords,
386 : num_unreserved_keywords)))
387 : {
388 730 : aux1.lval.keyword = kw->name;
389 730 : tok1 = kw->value;
390 : }
391 : else
392 4372 : tok1 = T_WORD;
393 : }
394 : }
395 : }
396 : else
397 : {
398 : /*
399 : * Not a potential plpgsql variable name, just return the data.
400 : *
401 : * Note that we also come through here if the grammar pushed back a
402 : * T_DATUM, T_CWORD, T_WORD, or unreserved-keyword token returned by a
403 : * previous lookup cycle; thus, pushbacks do not incur extra lookup
404 : * work, since we'll never do the above code twice for the same token.
405 : * This property also makes it safe to rely on the old value of
406 : * plpgsql_yytoken in the is-this-start-of-statement test above.
407 : */
408 : }
409 :
410 25848 : plpgsql_yylval = aux1.lval;
411 25848 : plpgsql_yylloc = aux1.lloc;
412 25848 : plpgsql_yyleng = aux1.leng;
413 25848 : plpgsql_yytoken = tok1;
414 25848 : return tok1;
415 : }
416 :
417 : /*
418 : * Internal yylex function. This wraps the core lexer and adds one feature:
419 : * a token pushback stack. We also make a couple of trivial single-token
420 : * translations from what the core lexer does to what we want, in particular
421 : * interfacing from the core_YYSTYPE to YYSTYPE union.
422 : */
423 : static int
424 36847 : internal_yylex(TokenAuxData *auxdata)
425 : {
426 : int token;
427 : const char *yytext;
428 :
429 36847 : if (num_pushbacks > 0)
430 : {
431 11034 : num_pushbacks--;
432 11034 : token = pushback_token[num_pushbacks];
433 11034 : *auxdata = pushback_auxdata[num_pushbacks];
434 : }
435 : else
436 : {
437 25813 : token = core_yylex(&auxdata->lval.core_yystype,
438 : &auxdata->lloc,
439 : yyscanner);
440 :
441 : /* remember the length of yytext before it gets changed */
442 25813 : yytext = core_yy.scanbuf + auxdata->lloc;
443 25813 : auxdata->leng = strlen(yytext);
444 :
445 : /* Check for << >> and #, which the core considers operators */
446 25813 : if (token == Op)
447 : {
448 187 : if (strcmp(auxdata->lval.str, "<<") == 0)
449 15 : token = LESS_LESS;
450 172 : else if (strcmp(auxdata->lval.str, ">>") == 0)
451 15 : token = GREATER_GREATER;
452 157 : else if (strcmp(auxdata->lval.str, "#") == 0)
453 4 : token = '#';
454 : }
455 :
456 : /* The core returns PARAM as ival, but we treat it like IDENT */
457 25626 : else if (token == PARAM)
458 : {
459 173 : auxdata->lval.str = pstrdup(yytext);
460 : }
461 : }
462 :
463 36847 : return token;
464 : }
465 :
466 : /*
467 : * Push back a token to be re-read by next internal_yylex() call.
468 : */
469 : static void
470 11039 : push_back_token(int token, TokenAuxData *auxdata)
471 : {
472 11039 : if (num_pushbacks >= MAX_PUSHBACKS)
473 0 : elog(ERROR, "too many tokens pushed back");
474 11039 : pushback_token[num_pushbacks] = token;
475 11039 : pushback_auxdata[num_pushbacks] = *auxdata;
476 11039 : num_pushbacks++;
477 11039 : }
478 :
479 : /*
480 : * Push back a single token to be re-read by next plpgsql_yylex() call.
481 : *
482 : * NOTE: this does not cause yylval or yylloc to "back up". Also, it
483 : * is not a good idea to push back a token code other than what you read.
484 : */
485 : void
486 1846 : plpgsql_push_back_token(int token)
487 : {
488 : TokenAuxData auxdata;
489 :
490 1846 : auxdata.lval = plpgsql_yylval;
491 1846 : auxdata.lloc = plpgsql_yylloc;
492 1846 : auxdata.leng = plpgsql_yyleng;
493 1846 : push_back_token(token, &auxdata);
494 1846 : }
495 :
496 : /*
497 : * Tell whether a token is an unreserved keyword.
498 : *
499 : * (If it is, its lowercased form was returned as the token value, so we
500 : * do not need to offer that data here.)
501 : */
502 : bool
503 4 : plpgsql_token_is_unreserved_keyword(int token)
504 : {
505 : int i;
506 :
507 308 : for (i = 0; i < num_unreserved_keywords; i++)
508 : {
509 304 : if (unreserved_keywords[i].value == token)
510 0 : return true;
511 : }
512 4 : return false;
513 : }
514 :
515 : /*
516 : * Append the function text starting at startlocation and extending to
517 : * (not including) endlocation onto the existing contents of "buf".
518 : */
519 : void
520 3002 : plpgsql_append_source_text(StringInfo buf,
521 : int startlocation, int endlocation)
522 : {
523 3002 : Assert(startlocation <= endlocation);
524 3002 : appendBinaryStringInfo(buf, scanorig + startlocation,
525 : endlocation - startlocation);
526 3002 : }
527 :
528 : /*
529 : * Peek one token ahead in the input stream. Only the token code is
530 : * made available, not any of the auxiliary info such as location.
531 : *
532 : * NB: no variable or unreserved keyword lookup is performed here, they will
533 : * be returned as IDENT. Reserved keywords are resolved as usual.
534 : */
535 : int
536 399 : plpgsql_peek(void)
537 : {
538 : int tok1;
539 : TokenAuxData aux1;
540 :
541 399 : tok1 = internal_yylex(&aux1);
542 399 : push_back_token(tok1, &aux1);
543 399 : return tok1;
544 : }
545 :
546 : /*
547 : * Peek two tokens ahead in the input stream. The first token and its
548 : * location in the query are returned in *tok1_p and *tok1_loc, second token
549 : * and its location in *tok2_p and *tok2_loc.
550 : *
551 : * NB: no variable or unreserved keyword lookup is performed here, they will
552 : * be returned as IDENT. Reserved keywords are resolved as usual.
553 : */
554 : void
555 24 : plpgsql_peek2(int *tok1_p, int *tok2_p, int *tok1_loc, int *tok2_loc)
556 : {
557 : int tok1,
558 : tok2;
559 : TokenAuxData aux1,
560 : aux2;
561 :
562 24 : tok1 = internal_yylex(&aux1);
563 24 : tok2 = internal_yylex(&aux2);
564 :
565 24 : *tok1_p = tok1;
566 24 : if (tok1_loc)
567 24 : *tok1_loc = aux1.lloc;
568 24 : *tok2_p = tok2;
569 24 : if (tok2_loc)
570 0 : *tok2_loc = aux2.lloc;
571 :
572 24 : push_back_token(tok2, &aux2);
573 24 : push_back_token(tok1, &aux1);
574 24 : }
575 :
576 : /*
577 : * plpgsql_scanner_errposition
578 : * Report an error cursor position, if possible.
579 : *
580 : * This is expected to be used within an ereport() call. The return value
581 : * is a dummy (always 0, in fact).
582 : *
583 : * Note that this can only be used for messages emitted during initial
584 : * parsing of a plpgsql function, since it requires the scanorig string
585 : * to still be available.
586 : */
587 : int
588 29 : plpgsql_scanner_errposition(int location)
589 : {
590 : int pos;
591 :
592 29 : if (location < 0 || scanorig == NULL)
593 0 : return 0; /* no-op if location is unknown */
594 :
595 : /* Convert byte offset to character number */
596 29 : pos = pg_mbstrlen_with_len(scanorig, location) + 1;
597 : /* And pass it to the ereport mechanism */
598 29 : (void) internalerrposition(pos);
599 : /* Also pass the function body string */
600 29 : return internalerrquery(scanorig);
601 : }
602 :
603 : /*
604 : * plpgsql_yyerror
605 : * Report a lexer or grammar error.
606 : *
607 : * The message's cursor position refers to the current token (the one
608 : * last returned by plpgsql_yylex()).
609 : * This is OK for syntax error messages from the Bison parser, because Bison
610 : * parsers report error as soon as the first unparsable token is reached.
611 : * Beware of using yyerror for other purposes, as the cursor position might
612 : * be misleading!
613 : */
614 : void
615 1 : plpgsql_yyerror(const char *message)
616 : {
617 1 : char *yytext = core_yy.scanbuf + plpgsql_yylloc;
618 :
619 1 : if (*yytext == '\0')
620 : {
621 0 : ereport(ERROR,
622 : (errcode(ERRCODE_SYNTAX_ERROR),
623 : /* translator: %s is typically the translation of "syntax error" */
624 : errmsg("%s at end of input", _(message)),
625 : plpgsql_scanner_errposition(plpgsql_yylloc)));
626 : }
627 : else
628 : {
629 : /*
630 : * If we have done any lookahead then flex will have restored the
631 : * character after the end-of-token. Zap it again so that we report
632 : * only the single token here. This modifies scanbuf but we no longer
633 : * care about that.
634 : */
635 1 : yytext[plpgsql_yyleng] = '\0';
636 :
637 1 : ereport(ERROR,
638 : (errcode(ERRCODE_SYNTAX_ERROR),
639 : /* translator: first %s is typically the translation of "syntax error" */
640 : errmsg("%s at or near \"%s\"", _(message), yytext),
641 : plpgsql_scanner_errposition(plpgsql_yylloc)));
642 : }
643 : }
644 :
645 : /*
646 : * Given a location (a byte offset in the function source text),
647 : * return a line number.
648 : *
649 : * We expect that this is typically called for a sequence of increasing
650 : * location values, so optimize accordingly by tracking the endpoints
651 : * of the "current" line.
652 : */
653 : int
654 3779 : plpgsql_location_to_lineno(int location)
655 : {
656 : const char *loc;
657 :
658 3779 : if (location < 0 || scanorig == NULL)
659 0 : return 0; /* garbage in, garbage out */
660 3779 : loc = scanorig + location;
661 :
662 : /* be correct, but not fast, if input location goes backwards */
663 3779 : if (loc < cur_line_start)
664 1167 : location_lineno_init();
665 :
666 24474 : while (cur_line_end != NULL && loc > cur_line_end)
667 : {
668 16916 : cur_line_start = cur_line_end + 1;
669 16916 : cur_line_num++;
670 16916 : cur_line_end = strchr(cur_line_start, '\n');
671 : }
672 :
673 3779 : return cur_line_num;
674 : }
675 :
676 : /* initialize or reset the state for plpgsql_location_to_lineno */
677 : static void
678 1731 : location_lineno_init(void)
679 : {
680 1731 : cur_line_start = scanorig;
681 1731 : cur_line_num = 1;
682 :
683 1731 : cur_line_end = strchr(cur_line_start, '\n');
684 1731 : }
685 :
686 : /* return the most recently computed lineno */
687 : int
688 3 : plpgsql_latest_lineno(void)
689 : {
690 3 : return cur_line_num;
691 : }
692 :
693 :
694 : /*
695 : * Called before any actual parsing is done
696 : *
697 : * Note: the passed "str" must remain valid until plpgsql_scanner_finish().
698 : * Although it is not fed directly to flex, we need the original string
699 : * to cite in error messages.
700 : */
701 : void
702 564 : plpgsql_scanner_init(const char *str)
703 : {
704 : /* Start up the core scanner */
705 564 : yyscanner = scanner_init(str, &core_yy,
706 : reserved_keywords, num_reserved_keywords);
707 :
708 : /*
709 : * scanorig points to the original string, which unlike the scanner's
710 : * scanbuf won't be modified on-the-fly by flex. Notice that although
711 : * yytext points into scanbuf, we rely on being able to apply locations
712 : * (offsets from string start) to scanorig as well.
713 : */
714 564 : scanorig = str;
715 :
716 : /* Other setup */
717 564 : plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL;
718 564 : plpgsql_yytoken = 0;
719 :
720 564 : num_pushbacks = 0;
721 :
722 564 : location_lineno_init();
723 564 : }
724 :
725 : /*
726 : * Called after parsing is done to clean up after plpgsql_scanner_init()
727 : */
728 : void
729 541 : plpgsql_scanner_finish(void)
730 : {
731 : /* release storage */
732 541 : scanner_finish(yyscanner);
733 : /* avoid leaving any dangling pointers */
734 541 : yyscanner = NULL;
735 541 : scanorig = NULL;
736 541 : }
|