Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * parser.c
4 : * Main entry point/driver for PostgreSQL grammar
5 : *
6 : * Note that the grammar is not allowed to perform any table access
7 : * (since we need to be able to do basic parsing even while inside an
8 : * aborted transaction). Therefore, the data structures returned by
9 : * the grammar are "raw" parsetrees that still need to be analyzed by
10 : * analyze.c and related files.
11 : *
12 : *
13 : * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
14 : * Portions Copyright (c) 1994, Regents of the University of California
15 : *
16 : * IDENTIFICATION
17 : * src/backend/parser/parser.c
18 : *
19 : *-------------------------------------------------------------------------
20 : */
21 :
22 : #include "postgres.h"
23 :
24 : #include "parser/gramparse.h"
25 : #include "parser/parser.h"
26 :
27 :
28 : /*
29 : * raw_parser
30 : * Given a query in string form, do lexical and grammatical analysis.
31 : *
32 : * Returns a list of raw (un-analyzed) parse trees. The immediate elements
33 : * of the list are always RawStmt nodes.
34 : */
35 : List *
36 37726 : raw_parser(const char *str)
37 : {
38 : core_yyscan_t yyscanner;
39 : base_yy_extra_type yyextra;
40 : int yyresult;
41 :
42 : /* initialize the flex scanner */
43 37726 : yyscanner = scanner_init(str, &yyextra.core_yy_extra,
44 : ScanKeywords, NumScanKeywords);
45 :
46 : /* base_yylex() only needs this much initialization */
47 37726 : yyextra.have_lookahead = false;
48 :
49 : /* initialize the bison parser */
50 37726 : parser_init(&yyextra);
51 :
52 : /* Parse! */
53 37726 : yyresult = base_yyparse(yyscanner);
54 :
55 : /* Clean up (release memory) */
56 37593 : scanner_finish(yyscanner);
57 :
58 37593 : if (yyresult) /* error */
59 0 : return NIL;
60 :
61 37593 : return yyextra.parsetree;
62 : }
63 :
64 :
65 : /*
66 : * Intermediate filter between parser and core lexer (core_yylex in scan.l).
67 : *
68 : * This filter is needed because in some cases the standard SQL grammar
69 : * requires more than one token lookahead. We reduce these cases to one-token
70 : * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
71 : *
72 : * Using a filter is simpler than trying to recognize multiword tokens
73 : * directly in scan.l, because we'd have to allow for comments between the
74 : * words. Furthermore it's not clear how to do that without re-introducing
75 : * scanner backtrack, which would cost more performance than this filter
76 : * layer does.
77 : *
78 : * The filter also provides a convenient place to translate between
79 : * the core_YYSTYPE and YYSTYPE representations (which are really the
80 : * same thing anyway, but notationally they're different).
81 : */
82 : int
83 647597 : base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
84 : {
85 647597 : base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
86 : int cur_token;
87 : int next_token;
88 : int cur_token_length;
89 : YYLTYPE cur_yylloc;
90 :
91 : /* Get next token --- we might already have it */
92 647597 : if (yyextra->have_lookahead)
93 : {
94 2141 : cur_token = yyextra->lookahead_token;
95 2141 : lvalp->core_yystype = yyextra->lookahead_yylval;
96 2141 : *llocp = yyextra->lookahead_yylloc;
97 2141 : *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
98 2141 : yyextra->have_lookahead = false;
99 : }
100 : else
101 645456 : cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
102 :
103 : /*
104 : * If this token isn't one that requires lookahead, just return it. If it
105 : * does, determine the token length. (We could get that via strlen(), but
106 : * since we have such a small set of possibilities, hardwiring seems
107 : * feasible and more efficient.)
108 : */
109 647588 : switch (cur_token)
110 : {
111 : case NOT:
112 1456 : cur_token_length = 3;
113 1456 : break;
114 : case NULLS_P:
115 20 : cur_token_length = 5;
116 20 : break;
117 : case WITH:
118 671 : cur_token_length = 4;
119 671 : break;
120 : default:
121 645441 : return cur_token;
122 : }
123 :
124 : /*
125 : * Identify end+1 of current token. core_yylex() has temporarily stored a
126 : * '\0' here, and will undo that when we call it again. We need to redo
127 : * it to fully revert the lookahead call for error reporting purposes.
128 : */
129 4294 : yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
130 2147 : *llocp + cur_token_length;
131 2147 : Assert(*(yyextra->lookahead_end) == '\0');
132 :
133 : /*
134 : * Save and restore *llocp around the call. It might look like we could
135 : * avoid this by just passing &lookahead_yylloc to core_yylex(), but that
136 : * does not work because flex actually holds onto the last-passed pointer
137 : * internally, and will use that for error reporting. We need any error
138 : * reports to point to the current token, not the next one.
139 : */
140 2147 : cur_yylloc = *llocp;
141 :
142 : /* Get next token, saving outputs into lookahead variables */
143 2147 : next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
144 2147 : yyextra->lookahead_token = next_token;
145 2147 : yyextra->lookahead_yylloc = *llocp;
146 :
147 2147 : *llocp = cur_yylloc;
148 :
149 : /* Now revert the un-truncation of the current token */
150 2147 : yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
151 2147 : *(yyextra->lookahead_end) = '\0';
152 :
153 2147 : yyextra->have_lookahead = true;
154 :
155 : /* Replace cur_token if needed, based on lookahead */
156 2147 : switch (cur_token)
157 : {
158 : case NOT:
159 : /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
160 1456 : switch (next_token)
161 : {
162 : case BETWEEN:
163 : case IN_P:
164 : case LIKE:
165 : case ILIKE:
166 : case SIMILAR:
167 79 : cur_token = NOT_LA;
168 79 : break;
169 : }
170 1456 : break;
171 :
172 : case NULLS_P:
173 : /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
174 20 : switch (next_token)
175 : {
176 : case FIRST_P:
177 : case LAST_P:
178 20 : cur_token = NULLS_LA;
179 20 : break;
180 : }
181 20 : break;
182 :
183 : case WITH:
184 : /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
185 671 : switch (next_token)
186 : {
187 : case TIME:
188 : case ORDINALITY:
189 181 : cur_token = WITH_LA;
190 181 : break;
191 : }
192 671 : break;
193 : }
194 :
195 2147 : return cur_token;
196 : }
|