Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include <wctype.h>
15 : #include "sql_mem.h"
16 : #include "sql_scan.h"
17 : #include "sql_types.h"
18 : #include "sql_symbol.h"
19 : #include "sql_mvc.h"
20 : #include "sql_parser.tab.h"
21 : #include "sql_semantic.h"
22 : #include "sql_parser.h" /* for sql_error() */
23 :
24 : #include "stream.h"
25 : #include "mapi_prompt.h"
26 : #include <unistd.h>
27 : #include <string.h>
28 : #include <ctype.h>
29 : #include "sql_keyword.h"
30 :
31 : /**
32 : * Removes all comments before the query. In query comments are kept.
33 : */
34 : char *
35 395611 : query_cleaned(allocator *sa, const char *query)
36 : {
37 395611 : char *q, *r, *c = NULL;
38 395611 : int lines = 0;
39 395611 : int quote = 0; /* inside quotes ('..', "..", {..}) */
40 395611 : bool bs = false; /* seen a backslash in a quoted string */
41 395611 : bool incomment1 = false; /* inside traditional C style comment */
42 395611 : bool incomment2 = false; /* inside comment starting with -- */
43 395611 : bool inline_comment = false;
44 :
45 395611 : r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
46 395608 : if(!r)
47 : return NULL;
48 :
49 66561768 : for (q = r; *query; query++) {
50 66166160 : if (incomment1) {
51 16116 : if (*query == '/' && query[-1] == '*') {
52 233 : incomment1 = false;
53 233 : if (c == r && lines > 0) {
54 225 : q = r; // reset to beginning
55 225 : lines = 0;
56 225 : continue;
57 : }
58 : }
59 15891 : if (*query == '\n') lines++;
60 15891 : *q++ = *query;
61 66150044 : } else if (incomment2) {
62 824353 : if (*query == '\n') {
63 2778 : incomment2 = false;
64 2778 : inline_comment = false;
65 : /* add newline only if comment doesn't
66 : * occupy whole line */
67 2778 : if (q > r && q[-1] != '\n'){
68 883 : *q++ = '\n';
69 883 : lines++;
70 : }
71 821575 : } else if (inline_comment){
72 17378 : *q++ = *query; // preserve in line query comments
73 : }
74 65325691 : } else if (quote) {
75 22099462 : if (bs) {
76 : bs = false;
77 22096221 : } else if (*query == '\\') {
78 : bs = true;
79 22092980 : } else if (*query == quote) {
80 662611 : quote = 0;
81 : }
82 22099462 : *q++ = *query;
83 43226229 : } else if (*query == '"' || *query == '\'') {
84 662143 : quote = *query;
85 662143 : *q++ = *query;
86 42564086 : } else if (*query == '{') {
87 513 : quote = '}';
88 513 : *q++ = *query;
89 42563573 : } else if (*query == '-' && query[1] == '-') {
90 2778 : if (q > r && q[-1] != '\n') {
91 883 : inline_comment = true;
92 883 : *q++ = *query; // preserve in line query comments
93 : }
94 : incomment2 = true;
95 42560795 : } else if (*query == '/' && query[1] == '*') {
96 233 : incomment1 = true;
97 233 : c = q;
98 233 : *q++ = *query;
99 42560562 : } else if (*query == '\n') {
100 : /* collapse newlines */
101 854876 : if (q > r && q[-1] != '\n') {
102 812899 : *q++ = '\n';
103 812899 : lines++;
104 : }
105 41705686 : } else if (*query == ' ' || *query == '\t') {
106 : /* collapse white space */
107 6872554 : if (q > r && q[-1] != ' ')
108 5396450 : *q++ = ' ';
109 : } else {
110 34833132 : *q++ = *query;
111 : }
112 : }
113 395608 : *q = 0;
114 395608 : return r;
115 : }
116 :
117 : int
118 315 : scanner_init_keywords(void)
119 : {
120 315 : int failed = 0;
121 :
122 315 : failed += keywords_insert("false", BOOL_FALSE);
123 315 : failed += keywords_insert("true", BOOL_TRUE);
124 315 : failed += keywords_insert("bool", sqlBOOL);
125 :
126 315 : failed += keywords_insert("ALTER", ALTER);
127 315 : failed += keywords_insert("ADD", ADD);
128 315 : failed += keywords_insert("AND", AND);
129 :
130 315 : failed += keywords_insert("RANK", RANK);
131 315 : failed += keywords_insert("DENSE_RANK", RANK);
132 315 : failed += keywords_insert("PERCENT_RANK", RANK);
133 315 : failed += keywords_insert("CUME_DIST", RANK);
134 315 : failed += keywords_insert("ROW_NUMBER", RANK);
135 315 : failed += keywords_insert("NTILE", RANK);
136 315 : failed += keywords_insert("LAG", RANK);
137 315 : failed += keywords_insert("LEAD", RANK);
138 315 : failed += keywords_insert("FETCH", FETCH);
139 315 : failed += keywords_insert("FIRST_VALUE", RANK);
140 315 : failed += keywords_insert("LAST_VALUE", RANK);
141 315 : failed += keywords_insert("NTH_VALUE", RANK);
142 :
143 315 : failed += keywords_insert("BEST", BEST);
144 315 : failed += keywords_insert("EFFORT", EFFORT);
145 :
146 315 : failed += keywords_insert("AS", AS);
147 315 : failed += keywords_insert("ASC", ASC);
148 315 : failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
149 315 : failed += keywords_insert("BETWEEN", BETWEEN);
150 315 : failed += keywords_insert("SYMMETRIC", SYMMETRIC);
151 315 : failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
152 315 : failed += keywords_insert("BY", BY);
153 315 : failed += keywords_insert("CAST", CAST);
154 315 : failed += keywords_insert("CONVERT", CONVERT);
155 315 : failed += keywords_insert("CHARACTER", CHARACTER);
156 315 : failed += keywords_insert("CHAR", CHARACTER);
157 315 : failed += keywords_insert("VARYING", VARYING);
158 315 : failed += keywords_insert("VARCHAR", VARCHAR);
159 315 : failed += keywords_insert("BINARY", BINARY);
160 315 : failed += keywords_insert("LARGE", LARGE);
161 315 : failed += keywords_insert("OBJECT", OBJECT);
162 315 : failed += keywords_insert("CLOB", CLOB);
163 315 : failed += keywords_insert("BLOB", sqlBLOB);
164 315 : failed += keywords_insert("TEXT", sqlTEXT);
165 315 : failed += keywords_insert("TINYTEXT", sqlTEXT);
166 315 : failed += keywords_insert("STRING", CLOB); /* ? */
167 315 : failed += keywords_insert("CHECK", CHECK);
168 315 : failed += keywords_insert("CLIENT", CLIENT);
169 315 : failed += keywords_insert("SERVER", SERVER);
170 315 : failed += keywords_insert("COMMENT", COMMENT);
171 315 : failed += keywords_insert("CONSTRAINT", CONSTRAINT);
172 315 : failed += keywords_insert("CREATE", CREATE);
173 315 : failed += keywords_insert("CROSS", CROSS);
174 315 : failed += keywords_insert("COPY", COPY);
175 315 : failed += keywords_insert("RECORDS", RECORDS);
176 315 : failed += keywords_insert("DELIMITERS", DELIMITERS);
177 315 : failed += keywords_insert("STDIN", STDIN);
178 315 : failed += keywords_insert("STDOUT", STDOUT);
179 :
180 315 : failed += keywords_insert("TINYINT", TINYINT);
181 315 : failed += keywords_insert("SMALLINT", SMALLINT);
182 315 : failed += keywords_insert("INTEGER", sqlINTEGER);
183 315 : failed += keywords_insert("INT", sqlINTEGER);
184 315 : failed += keywords_insert("MEDIUMINT", sqlINTEGER);
185 315 : failed += keywords_insert("BIGINT", BIGINT);
186 : #ifdef HAVE_HGE
187 315 : failed += keywords_insert("HUGEINT", HUGEINT);
188 : #endif
189 315 : failed += keywords_insert("DEC", sqlDECIMAL);
190 315 : failed += keywords_insert("DECIMAL", sqlDECIMAL);
191 315 : failed += keywords_insert("NUMERIC", sqlDECIMAL);
192 315 : failed += keywords_insert("DECLARE", DECLARE);
193 315 : failed += keywords_insert("DEFAULT", DEFAULT);
194 315 : failed += keywords_insert("DESC", DESC);
195 315 : failed += keywords_insert("DISTINCT", DISTINCT);
196 315 : failed += keywords_insert("DOUBLE", sqlDOUBLE);
197 315 : failed += keywords_insert("REAL", sqlREAL);
198 315 : failed += keywords_insert("DROP", DROP);
199 315 : failed += keywords_insert("ESCAPE", ESCAPE);
200 315 : failed += keywords_insert("EXISTS", EXISTS);
201 315 : failed += keywords_insert("UESCAPE", UESCAPE);
202 315 : failed += keywords_insert("EXTRACT", EXTRACT);
203 315 : failed += keywords_insert("FLOAT", sqlFLOAT);
204 315 : failed += keywords_insert("FOR", FOR);
205 315 : failed += keywords_insert("FOREIGN", FOREIGN);
206 315 : failed += keywords_insert("FROM", FROM);
207 315 : failed += keywords_insert("FWF", FWF);
208 :
209 315 : failed += keywords_insert("BIG", BIG);
210 315 : failed += keywords_insert("LITTLE", LITTLE);
211 315 : failed += keywords_insert("NATIVE", NATIVE);
212 315 : failed += keywords_insert("ENDIAN", ENDIAN);
213 :
214 315 : failed += keywords_insert("REFERENCES", REFERENCES);
215 :
216 315 : failed += keywords_insert("MATCH", MATCH);
217 315 : failed += keywords_insert("FULL", FULL);
218 315 : failed += keywords_insert("PARTIAL", PARTIAL);
219 315 : failed += keywords_insert("SIMPLE", SIMPLE);
220 :
221 315 : failed += keywords_insert("INSERT", INSERT);
222 315 : failed += keywords_insert("UPDATE", UPDATE);
223 315 : failed += keywords_insert("DELETE", sqlDELETE);
224 315 : failed += keywords_insert("TRUNCATE", TRUNCATE);
225 315 : failed += keywords_insert("MATCHED", MATCHED);
226 :
227 315 : failed += keywords_insert("ACTION", ACTION);
228 315 : failed += keywords_insert("CASCADE", CASCADE);
229 315 : failed += keywords_insert("RESTRICT", RESTRICT);
230 315 : failed += keywords_insert("FIRST", FIRST);
231 315 : failed += keywords_insert("GLOBAL", GLOBAL);
232 315 : failed += keywords_insert("GROUP", sqlGROUP);
233 315 : failed += keywords_insert("GROUPING", GROUPING);
234 315 : failed += keywords_insert("ROLLUP", ROLLUP);
235 315 : failed += keywords_insert("CUBE", CUBE);
236 315 : failed += keywords_insert("HAVING", HAVING);
237 315 : failed += keywords_insert("ILIKE", ILIKE);
238 315 : failed += keywords_insert("IMPRINTS", IMPRINTS);
239 315 : failed += keywords_insert("IN", sqlIN);
240 315 : failed += keywords_insert("INNER", INNER);
241 315 : failed += keywords_insert("INTO", INTO);
242 315 : failed += keywords_insert("IS", IS);
243 315 : failed += keywords_insert("JOIN", JOIN);
244 315 : failed += keywords_insert("KEY", KEY);
245 315 : failed += keywords_insert("LATERAL", LATERAL);
246 315 : failed += keywords_insert("LEFT", LEFT);
247 315 : failed += keywords_insert("LIKE", LIKE);
248 315 : failed += keywords_insert("LIMIT", LIMIT);
249 315 : failed += keywords_insert("SAMPLE", SAMPLE);
250 315 : failed += keywords_insert("SEED", SEED);
251 315 : failed += keywords_insert("LAST", LAST);
252 315 : failed += keywords_insert("LOCAL", LOCAL);
253 315 : failed += keywords_insert("NATURAL", NATURAL);
254 315 : failed += keywords_insert("NOT", NOT);
255 315 : failed += keywords_insert("NULL", sqlNULL);
256 315 : failed += keywords_insert("NULLS", NULLS);
257 315 : failed += keywords_insert("OFFSET", OFFSET);
258 315 : failed += keywords_insert("ON", ON);
259 315 : failed += keywords_insert("OPTIONS", OPTIONS);
260 315 : failed += keywords_insert("OPTION", OPTION);
261 315 : failed += keywords_insert("OR", OR);
262 315 : failed += keywords_insert("ORDER", ORDER);
263 315 : failed += keywords_insert("ORDERED", ORDERED);
264 315 : failed += keywords_insert("OUTER", OUTER);
265 315 : failed += keywords_insert("OVER", OVER);
266 315 : failed += keywords_insert("PARTITION", PARTITION);
267 315 : failed += keywords_insert("PATH", PATH);
268 315 : failed += keywords_insert("PRECISION", PRECISION);
269 315 : failed += keywords_insert("PRIMARY", PRIMARY);
270 :
271 315 : failed += keywords_insert("USER", USER);
272 315 : failed += keywords_insert("RENAME", RENAME);
273 315 : failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
274 315 : failed += keywords_insert("ENCRYPTED", ENCRYPTED);
275 315 : failed += keywords_insert("PASSWORD", PASSWORD);
276 315 : failed += keywords_insert("GRANT", GRANT);
277 315 : failed += keywords_insert("REVOKE", REVOKE);
278 315 : failed += keywords_insert("ROLE", ROLE);
279 315 : failed += keywords_insert("ADMIN", ADMIN);
280 315 : failed += keywords_insert("PRIVILEGES", PRIVILEGES);
281 315 : failed += keywords_insert("PUBLIC", PUBLIC);
282 315 : failed += keywords_insert("CURRENT_USER", CURRENT_USER);
283 315 : failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
284 315 : failed += keywords_insert("SESSION_USER", SESSION_USER);
285 315 : failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
286 315 : failed += keywords_insert("SESSION", sqlSESSION);
287 315 : failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
288 315 : failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
289 315 : failed += keywords_insert("OPTIMIZER", OPTIMIZER);
290 :
291 315 : failed += keywords_insert("RIGHT", RIGHT);
292 315 : failed += keywords_insert("SCHEMA", SCHEMA);
293 315 : failed += keywords_insert("SELECT", SELECT);
294 315 : failed += keywords_insert("SET", SET);
295 315 : failed += keywords_insert("SETS", SETS);
296 315 : failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
297 :
298 315 : failed += keywords_insert("ALL", ALL);
299 315 : failed += keywords_insert("ANY", ANY);
300 315 : failed += keywords_insert("SOME", SOME);
301 315 : failed += keywords_insert("EVERY", ANY);
302 : /*
303 : failed += keywords_insert("SQLCODE", SQLCODE );
304 : */
305 315 : failed += keywords_insert("COLUMN", COLUMN);
306 315 : failed += keywords_insert("TABLE", TABLE);
307 315 : failed += keywords_insert("TEMPORARY", TEMPORARY);
308 315 : failed += keywords_insert("TEMP", TEMP);
309 315 : failed += keywords_insert("REMOTE", REMOTE);
310 315 : failed += keywords_insert("MERGE", MERGE);
311 315 : failed += keywords_insert("REPLICA", REPLICA);
312 315 : failed += keywords_insert("UNLOGGED", UNLOGGED);
313 315 : failed += keywords_insert("TO", TO);
314 315 : failed += keywords_insert("UNION", UNION);
315 315 : failed += keywords_insert("EXCEPT", EXCEPT);
316 315 : failed += keywords_insert("INTERSECT", INTERSECT);
317 315 : failed += keywords_insert("CORRESPONDING", CORRESPONDING);
318 315 : failed += keywords_insert("UNIQUE", UNIQUE);
319 315 : failed += keywords_insert("USING", USING);
320 315 : failed += keywords_insert("VALUES", VALUES);
321 315 : failed += keywords_insert("VIEW", VIEW);
322 315 : failed += keywords_insert("WHERE", WHERE);
323 315 : failed += keywords_insert("WITH", WITH);
324 315 : failed += keywords_insert("WITHIN", WITHIN);
325 315 : failed += keywords_insert("WITHOUT", WITHOUT);
326 315 : failed += keywords_insert("DATA", DATA);
327 :
328 315 : failed += keywords_insert("DATE", sqlDATE);
329 315 : failed += keywords_insert("TIME", TIME);
330 315 : failed += keywords_insert("TIMESTAMP", TIMESTAMP);
331 315 : failed += keywords_insert("INTERVAL", INTERVAL);
332 315 : failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
333 315 : failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
334 315 : failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
335 315 : failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
336 315 : failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
337 315 : failed += keywords_insert("LOCALTIME", LOCALTIME);
338 315 : failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
339 315 : failed += keywords_insert("ZONE", ZONE);
340 :
341 315 : failed += keywords_insert("CENTURY", CENTURY);
342 315 : failed += keywords_insert("DECADE", DECADE);
343 315 : failed += keywords_insert("YEAR", YEAR);
344 315 : failed += keywords_insert("QUARTER", QUARTER);
345 315 : failed += keywords_insert("MONTH", MONTH);
346 315 : failed += keywords_insert("WEEK", WEEK);
347 315 : failed += keywords_insert("DOW", DOW);
348 315 : failed += keywords_insert("DOY", DOY);
349 315 : failed += keywords_insert("DAY", DAY);
350 315 : failed += keywords_insert("HOUR", HOUR);
351 315 : failed += keywords_insert("MINUTE", MINUTE);
352 315 : failed += keywords_insert("SECOND", SECOND);
353 315 : failed += keywords_insert("EPOCH", EPOCH);
354 :
355 315 : failed += keywords_insert("POSITION", POSITION);
356 315 : failed += keywords_insert("SUBSTRING", SUBSTRING);
357 315 : failed += keywords_insert("SPLIT_PART", SPLIT_PART);
358 315 : failed += keywords_insert("TRIM", TRIM);
359 315 : failed += keywords_insert("LEADING", LEADING);
360 315 : failed += keywords_insert("TRAILING", TRAILING);
361 315 : failed += keywords_insert("BOTH", BOTH);
362 :
363 315 : failed += keywords_insert("CASE", CASE);
364 315 : failed += keywords_insert("WHEN", WHEN);
365 315 : failed += keywords_insert("THEN", THEN);
366 315 : failed += keywords_insert("ELSE", ELSE);
367 315 : failed += keywords_insert("END", END);
368 315 : failed += keywords_insert("NULLIF", NULLIF);
369 315 : failed += keywords_insert("COALESCE", COALESCE);
370 315 : failed += keywords_insert("ELSEIF", ELSEIF);
371 315 : failed += keywords_insert("IF", IF);
372 315 : failed += keywords_insert("WHILE", WHILE);
373 315 : failed += keywords_insert("DO", DO);
374 :
375 315 : failed += keywords_insert("COMMIT", COMMIT);
376 315 : failed += keywords_insert("ROLLBACK", ROLLBACK);
377 315 : failed += keywords_insert("SAVEPOINT", SAVEPOINT);
378 315 : failed += keywords_insert("RELEASE", RELEASE);
379 315 : failed += keywords_insert("WORK", WORK);
380 315 : failed += keywords_insert("CHAIN", CHAIN);
381 315 : failed += keywords_insert("PRESERVE", PRESERVE);
382 315 : failed += keywords_insert("ROWS", ROWS);
383 315 : failed += keywords_insert("NO", NO);
384 315 : failed += keywords_insert("START", START);
385 315 : failed += keywords_insert("TRANSACTION", TRANSACTION);
386 315 : failed += keywords_insert("READ", READ);
387 315 : failed += keywords_insert("WRITE", WRITE);
388 315 : failed += keywords_insert("ONLY", ONLY);
389 315 : failed += keywords_insert("ISOLATION", ISOLATION);
390 315 : failed += keywords_insert("LEVEL", LEVEL);
391 315 : failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
392 315 : failed += keywords_insert("COMMITTED", COMMITTED);
393 315 : failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
394 315 : failed += keywords_insert("SNAPSHOT", SNAPSHOT);
395 315 : failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
396 315 : failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
397 315 : failed += keywords_insert("SIZE", sqlSIZE);
398 315 : failed += keywords_insert("STORAGE", STORAGE);
399 :
400 315 : failed += keywords_insert("TYPE", TYPE);
401 315 : failed += keywords_insert("PROCEDURE", PROCEDURE);
402 315 : failed += keywords_insert("FUNCTION", FUNCTION);
403 315 : failed += keywords_insert("LOADER", sqlLOADER);
404 315 : failed += keywords_insert("REPLACE", REPLACE);
405 :
406 315 : failed += keywords_insert("FIELD", FIELD);
407 315 : failed += keywords_insert("FILTER", FILTER);
408 315 : failed += keywords_insert("AGGREGATE", AGGREGATE);
409 315 : failed += keywords_insert("RETURNS", RETURNS);
410 315 : failed += keywords_insert("EXTERNAL", EXTERNAL);
411 315 : failed += keywords_insert("NAME", sqlNAME);
412 315 : failed += keywords_insert("RETURN", RETURN);
413 315 : failed += keywords_insert("CALL", CALL);
414 315 : failed += keywords_insert("LANGUAGE", LANGUAGE);
415 :
416 315 : failed += keywords_insert("ANALYZE", ANALYZE);
417 315 : failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
418 315 : failed += keywords_insert("PLAN", SQL_PLAN);
419 315 : failed += keywords_insert("TRACE", SQL_TRACE);
420 315 : failed += keywords_insert("PREPARE", PREPARE);
421 315 : failed += keywords_insert("PREP", PREP);
422 315 : failed += keywords_insert("EXECUTE", EXECUTE);
423 315 : failed += keywords_insert("EXEC", EXEC);
424 315 : failed += keywords_insert("DEALLOCATE", DEALLOCATE);
425 :
426 315 : failed += keywords_insert("INDEX", INDEX);
427 :
428 315 : failed += keywords_insert("SEQUENCE", SEQUENCE);
429 315 : failed += keywords_insert("RESTART", RESTART);
430 315 : failed += keywords_insert("INCREMENT", INCREMENT);
431 315 : failed += keywords_insert("MAXVALUE", MAXVALUE);
432 315 : failed += keywords_insert("MINVALUE", MINVALUE);
433 315 : failed += keywords_insert("CYCLE", CYCLE);
434 315 : failed += keywords_insert("CACHE", CACHE);
435 315 : failed += keywords_insert("NEXT", NEXT);
436 315 : failed += keywords_insert("VALUE", VALUE);
437 315 : failed += keywords_insert("GENERATED", GENERATED);
438 315 : failed += keywords_insert("ALWAYS", ALWAYS);
439 315 : failed += keywords_insert("IDENTITY", IDENTITY);
440 315 : failed += keywords_insert("SERIAL", SERIAL);
441 315 : failed += keywords_insert("BIGSERIAL", BIGSERIAL);
442 315 : failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
443 315 : failed += keywords_insert("CONTINUE", CONTINUE);
444 :
445 315 : failed += keywords_insert("TRIGGER", TRIGGER);
446 315 : failed += keywords_insert("ATOMIC", ATOMIC);
447 315 : failed += keywords_insert("BEGIN", BEGIN);
448 315 : failed += keywords_insert("OF", OF);
449 315 : failed += keywords_insert("BEFORE", BEFORE);
450 315 : failed += keywords_insert("AFTER", AFTER);
451 315 : failed += keywords_insert("ROW", ROW);
452 315 : failed += keywords_insert("STATEMENT", STATEMENT);
453 315 : failed += keywords_insert("NEW", sqlNEW);
454 315 : failed += keywords_insert("OLD", OLD);
455 315 : failed += keywords_insert("EACH", EACH);
456 315 : failed += keywords_insert("REFERENCING", REFERENCING);
457 :
458 315 : failed += keywords_insert("RANGE", RANGE);
459 315 : failed += keywords_insert("UNBOUNDED", UNBOUNDED);
460 315 : failed += keywords_insert("PRECEDING", PRECEDING);
461 315 : failed += keywords_insert("FOLLOWING", FOLLOWING);
462 315 : failed += keywords_insert("CURRENT", CURRENT);
463 315 : failed += keywords_insert("EXCLUDE", EXCLUDE);
464 315 : failed += keywords_insert("OTHERS", OTHERS);
465 315 : failed += keywords_insert("TIES", TIES);
466 315 : failed += keywords_insert("GROUPS", GROUPS);
467 315 : failed += keywords_insert("WINDOW", WINDOW);
468 :
469 : /* special SQL/XML keywords */
470 315 : failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
471 315 : failed += keywords_insert("XMLCONCAT", XMLCONCAT);
472 315 : failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
473 315 : failed += keywords_insert("XMLELEMENT", XMLELEMENT);
474 315 : failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
475 315 : failed += keywords_insert("XMLFOREST", XMLFOREST);
476 315 : failed += keywords_insert("XMLPARSE", XMLPARSE);
477 315 : failed += keywords_insert("STRIP", STRIP);
478 315 : failed += keywords_insert("WHITESPACE", WHITESPACE);
479 315 : failed += keywords_insert("XMLPI", XMLPI);
480 315 : failed += keywords_insert("XMLQUERY", XMLQUERY);
481 315 : failed += keywords_insert("PASSING", PASSING);
482 315 : failed += keywords_insert("XMLTEXT", XMLTEXT);
483 315 : failed += keywords_insert("NIL", NIL);
484 315 : failed += keywords_insert("REF", REF);
485 315 : failed += keywords_insert("ABSENT", ABSENT);
486 315 : failed += keywords_insert("DOCUMENT", DOCUMENT);
487 315 : failed += keywords_insert("ELEMENT", ELEMENT);
488 315 : failed += keywords_insert("CONTENT", CONTENT);
489 315 : failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
490 315 : failed += keywords_insert("NAMESPACE", NAMESPACE);
491 315 : failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
492 315 : failed += keywords_insert("RETURNING", RETURNING);
493 315 : failed += keywords_insert("LOCATION", LOCATION);
494 315 : failed += keywords_insert("ID", ID);
495 315 : failed += keywords_insert("ACCORDING", ACCORDING);
496 315 : failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
497 315 : failed += keywords_insert("URI", URI);
498 315 : failed += keywords_insert("XMLAGG", XMLAGG);
499 :
500 : /* keywords for opengis */
501 315 : failed += keywords_insert("GEOMETRY", GEOMETRY);
502 :
503 315 : failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
504 315 : failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
505 315 : failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
506 315 : failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
507 315 : failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
508 315 : failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
509 315 : failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
510 :
511 315 : failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
512 315 : failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
513 315 : failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
514 315 : failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
515 315 : failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
516 315 : failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
517 315 : failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
518 :
519 315 : failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
520 315 : failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
521 315 : failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
522 315 : failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
523 315 : failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
524 315 : failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
525 315 : failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
526 :
527 315 : failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
528 315 : failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
529 315 : failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
530 315 : failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
531 315 : failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
532 315 : failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
533 315 : failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
534 315 : failed += keywords_insert("LOGIN", LOGIN);
535 : // odbc keywords
536 315 : failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
537 315 : failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
538 315 : failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
539 315 : failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
540 315 : failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
541 315 : failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
542 315 : failed += keywords_insert("DAYNAME", DAYNAME);
543 315 : failed += keywords_insert("IFNULL", IFNULL);
544 315 : failed += keywords_insert("MONTHNAME", MONTHNAME);
545 315 : failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
546 315 : failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
547 315 : failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
548 315 : failed += keywords_insert("SQL_BINARY", SQL_BINARY);
549 315 : failed += keywords_insert("SQL_BIT", SQL_BIT);
550 315 : failed += keywords_insert("SQL_CHAR", SQL_CHAR);
551 315 : failed += keywords_insert("SQL_DATE", SQL_DATE);
552 315 : failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
553 315 : failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
554 315 : failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
555 315 : failed += keywords_insert("SQL_GUID", SQL_GUID);
556 315 : failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
557 315 : failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
558 315 : failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
559 315 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
560 315 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
561 315 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
562 315 : failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
563 315 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
564 315 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
565 315 : failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
566 315 : failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
567 315 : failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
568 315 : failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
569 315 : failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
570 315 : failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
571 315 : failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
572 315 : failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
573 315 : failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
574 315 : failed += keywords_insert("SQL_REAL", SQL_REAL);
575 315 : failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
576 315 : failed += keywords_insert("SQL_TIME", SQL_TIME);
577 315 : failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
578 315 : failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
579 315 : failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
580 315 : failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
581 315 : failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
582 315 : failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
583 315 : failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
584 315 : failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
585 315 : failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
586 315 : failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
587 315 : failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
588 315 : failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
589 315 : failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
590 315 : failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
591 315 : failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
592 315 : failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
593 :
594 315 : failed += keywords_insert("LEAST", MARGFUNC);
595 315 : failed += keywords_insert("GREATEST", MARGFUNC);
596 315 : return failed;
597 : }
598 :
599 : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
600 :
601 : void
602 242727 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
603 : {
604 485454 : *s = (struct scanner) {
605 : .rs = rs,
606 : .ws = ws,
607 : .mode = LINE_N,
608 242727 : .raw_string_mode = GDKgetenv_istrue("raw_strings"),
609 : .aborted = false,
610 : };
611 242727 : }
612 :
613 : void
614 1216974 : scanner_query_processed(struct scanner *s)
615 : {
616 1216974 : int cur;
617 :
618 1216974 : if (s->yybak) {
619 464259 : s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
620 464259 : s->yybak = 0;
621 : }
622 1216974 : if (s->rs) {
623 1216974 : s->rs->pos += s->yycur;
624 : /* completely eat the query including white space after the ; */
625 2325876 : while (s->rs->pos < s->rs->len &&
626 2004062 : (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
627 1108902 : s->rs->pos++;
628 : }
629 : }
630 : /*assert(s->rs->pos <= s->rs->len);*/
631 1216974 : s->yycur = 0;
632 1216974 : s->started = 0;
633 1216974 : s->as = 0;
634 1216974 : s->schema = NULL;
635 1216974 : }
636 :
637 : static int
638 33 : scanner_error(mvc *lc, int cur)
639 : {
640 33 : switch (cur) {
641 0 : case EOF:
642 0 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
643 0 : return EOF;
644 33 : default:
645 : /* on Windows at least, iswcntrl returns TRUE for
646 : * U+FEFF, but we just want consistent error
647 : * messages */
648 33 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
649 : }
650 33 : return LEX_ERROR;
651 : }
652 :
653 :
654 : /*
655 : UTF-8 encoding is as follows:
656 : U-00000000 - U-0000007F: 0xxxxxxx
657 : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
658 : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
659 : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
660 : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
661 : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
662 : */
663 : /* To be correctly coded UTF-8, the sequence should be the shortest
664 : possible encoding of the value being encoded. This means that for
665 : an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
666 : utf8chkmsk[n] should be non-zero (else the encoding could be
667 : shorter).
668 : */
669 : static const int utf8chkmsk[] = {
670 : 0x0000007f,
671 : 0x00000780,
672 : 0x0000f800,
673 : 0x001f0000,
674 : 0x03e00000,
675 : 0x7c000000
676 : };
677 :
678 : static void
679 29817949 : utf8_putchar(struct scanner *lc, int ch)
680 : {
681 29817949 : if ((ch) < 0x80) {
682 29817944 : lc->yycur--;
683 5 : } else if ((ch) < 0x800) {
684 0 : lc->yycur -= 2;
685 5 : } else if ((ch) < 0x10000) {
686 5 : lc->yycur -= 3;
687 : } else {
688 0 : lc->yycur -= 4;
689 : }
690 29817949 : }
691 :
692 : static inline int
693 130276457 : scanner_read_more(struct scanner *lc, size_t n)
694 : {
695 130276457 : bstream *b = lc->rs;
696 130276457 : bool more = false;
697 :
698 :
699 130276457 : if (lc->aborted)
700 : return EOF;
701 130280764 : while (b->len < b->pos + lc->yycur + n) {
702 :
703 139369 : if (lc->mode == LINE_1 || !lc->started)
704 : return EOF;
705 :
706 : /* query is not finished ask for more */
707 6919 : if (b->eof || !isa_block_stream(b->s)) {
708 4762 : if (bstream_getoob(b)) {
709 0 : lc->aborted = true;
710 0 : return EOF;
711 : }
712 2157 : if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
713 2157 : mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
714 2157 : b->eof = false;
715 2157 : more = true;
716 : }
717 : /* we need more query text */
718 4314 : if (bstream_next(b) < 0) {
719 0 : if (mnstr_errnr(b->s) == MNSTR_INTERRUPT) {
720 : // now what?
721 0 : lc->errstr = "Query aborted";
722 0 : lc->aborted = true;
723 0 : mnstr_clearerr(b->s);
724 : }
725 0 : return EOF;
726 4314 : } else if (/* we asked for more data but didn't get any */
727 2157 : (more && b->eof && b->len < b->pos + lc->yycur + n))
728 : return EOF;
729 4307 : if (more && b->pos + lc->yycur + 2 == b->len && b->buf[b->pos + lc->yycur] == '\200' && b->buf[b->pos + lc->yycur + 1] == '\n') {
730 0 : lc->errstr = "Query aborted";
731 0 : b->len -= 2;
732 0 : b->buf[b->len] = 0;
733 0 : return EOF;
734 : }
735 : }
736 : return 1;
737 : }
738 :
739 : static inline int
740 129038105 : scanner_getc(struct scanner *lc)
741 : {
742 129038105 : bstream *b = lc->rs;
743 129038105 : unsigned char *s = NULL;
744 129038105 : int c, m, n, mask;
745 :
746 129038105 : if (scanner_read_more(lc, 1) == EOF) {
747 : //lc->errstr = SQLSTATE(42000) "end of input stream";
748 : return EOF;
749 : }
750 128905618 : lc->errstr = NULL;
751 :
752 128905618 : s = (unsigned char *) b->buf + b->pos + lc->yycur++;
753 128905618 : if (((c = *s) & 0x80) == 0) {
754 : /* 7-bit char */
755 : return c;
756 : }
757 88250 : for (n = 0, m = 0x40; c & m; n++, m >>= 1)
758 : ;
759 : /* n now is number of 10xxxxxx bytes that should follow */
760 29443 : if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
761 : /* incorrect UTF-8 sequence */
762 : /* n==0: c == 10xxxxxx */
763 : /* n>=6: c == 1111111x */
764 0 : lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
765 0 : goto error;
766 : }
767 :
768 29443 : if (scanner_read_more(lc, (size_t) n) == EOF)
769 : return EOF;
770 29443 : s = (unsigned char *) b->buf + b->pos + lc->yycur;
771 :
772 29443 : mask = utf8chkmsk[n];
773 29443 : c &= ~(0xFFC0 >> n); /* remove non-x bits */
774 88249 : while (--n >= 0) {
775 58807 : c <<= 6;
776 58807 : lc->yycur++;
777 58807 : if (((m = *s++) & 0xC0) != 0x80) {
778 : /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
779 : /* this includes end-of-string (m == 0) */
780 1 : lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
781 1 : goto error;
782 : }
783 58806 : c |= m & 0x3F;
784 : }
785 29442 : if ((c & mask) == 0) {
786 : /* incorrect UTF-8 sequence: not shortest possible */
787 0 : lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
788 0 : goto error;
789 : }
790 :
791 : return c;
792 :
793 1 : error:
794 1 : if (b->pos + lc->yycur < b->len) /* skip bogus char */
795 0 : lc->yycur++;
796 : return EOF;
797 : }
798 :
799 : static int
800 26424563 : scanner_token(struct scanner *lc, int token)
801 : {
802 26424563 : lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
803 26424563 : lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
804 26424563 : lc->yyval = token;
805 26424563 : return lc->yyval;
806 : }
807 :
808 : static int
809 1961536 : scanner_string(mvc *c, int quote, bool escapes)
810 : {
811 1961536 : struct scanner *lc = &c->scanner;
812 1961536 : bstream *rs = lc->rs;
813 1961536 : int cur = quote;
814 1961536 : bool escape = false;
815 1961536 : const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
816 :
817 1961536 : lc->started = 1;
818 1999144 : while (cur != EOF) {
819 1999129 : size_t pos = 0;
820 1999129 : const size_t yycur = rs->pos + lc->yycur;
821 :
822 34034720 : while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
823 32035591 : (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
824 64041728 : cur && (cur != quote || escape)) {
825 30036463 : if (escapes && cur == '\\')
826 6556 : escape = !escape;
827 : else
828 : escape = false;
829 : }
830 1999129 : if (pos == limit) {
831 0 : (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
832 0 : return LEX_ERROR;
833 : }
834 : /* BOM character not allowed as an identifier */
835 1999129 : if (cur == EOF || (quote == '"' && cur == 0xFEFF))
836 1 : return scanner_error(c, cur);
837 1999128 : lc->yycur += pos;
838 : /* check for quote escaped quote: Obscure SQL Rule */
839 1999128 : if (cur == quote && rs->buf[yycur + pos] == quote) {
840 8167 : lc->yycur++;
841 8167 : continue;
842 : }
843 1990961 : assert(yycur + pos <= rs->len + 1);
844 1990961 : if (cur == quote && !escape) {
845 1961506 : return scanner_token(lc, STRING);
846 : }
847 29455 : lc->yycur--; /* go back to current (possibly invalid) char */
848 : /* long utf8, if correct isn't the quote */
849 29455 : if (!cur) {
850 30 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
851 14 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
852 14 : return LEX_ERROR;
853 : }
854 16 : cur = scanner_read_more(lc, 1);
855 : } else {
856 29425 : cur = scanner_getc(lc);
857 : }
858 : }
859 15 : (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
860 15 : return EOF;
861 : }
862 :
863 : /* scan a structure {blah} into a string. We only count the matching {}
864 : * unless escaped. We do not consider embeddings in string literals yet
865 : */
866 :
867 : static int
868 234 : scanner_body(mvc *c)
869 : {
870 234 : struct scanner *lc = &c->scanner;
871 234 : bstream *rs = lc->rs;
872 234 : int cur = (int) 'x';
873 234 : int blk = 1;
874 234 : bool escape = false;
875 :
876 234 : lc->started = 1;
877 234 : assert(rs->buf[rs->pos + lc->yycur-1] == '{');
878 290 : while (cur != EOF) {
879 290 : size_t pos = rs->pos + lc->yycur;
880 :
881 32350 : while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
882 32060 : if (cur != '\\')
883 : escape = false;
884 : else
885 12 : escape = !escape;
886 32060 : blk += cur =='{';
887 32060 : blk -= cur =='}';
888 : }
889 290 : lc->yycur = pos - rs->pos;
890 290 : assert(pos <= rs->len + 1);
891 290 : if (blk == 0 && !escape){
892 234 : lc->yycur--; /* go back to current (possibly invalid) char */
893 234 : return scanner_token(lc, X_BODY);
894 : }
895 56 : lc->yycur--; /* go back to current (possibly invalid) char */
896 56 : if (!cur) {
897 56 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
898 0 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
899 0 : return LEX_ERROR;
900 : }
901 56 : cur = scanner_read_more(lc, 1);
902 : } else {
903 0 : cur = scanner_getc(lc);
904 : }
905 : }
906 0 : (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
907 0 : return EOF;
908 : }
909 :
910 : static int
911 12902338 : keyword_or_ident(mvc * c, int cur)
912 : {
913 12902338 : struct scanner *lc = &c->scanner;
914 12902338 : keyword *k = NULL;
915 12902338 : size_t s;
916 :
917 12902338 : lc->started = 1;
918 12902338 : utf8_putchar(lc, cur);
919 12902305 : s = lc->yycur;
920 12902305 : lc->yyval = IDENT;
921 77515310 : while ((cur = scanner_getc(lc)) != EOF) {
922 77515226 : if (!iswalnum(cur) && cur != '_') {
923 12902221 : utf8_putchar(lc, cur);
924 12902233 : (void)scanner_token(lc, IDENT);
925 12902233 : if ((k = find_keyword_bs(lc,s)))
926 7921675 : lc->yyval = k->token;
927 12902366 : return lc->yyval;
928 : }
929 : }
930 : if (cur < 0)
931 : return cur;
932 : (void)scanner_token(lc, IDENT);
933 : if ((k = find_keyword_bs(lc,s)))
934 : lc->yyval = k->token;
935 : return lc->yyval;
936 : }
937 :
938 : static int
939 13269378 : skip_white_space(struct scanner * lc)
940 : {
941 16845128 : int cur;
942 :
943 16845128 : do {
944 16845128 : lc->yysval = lc->yycur;
945 16845128 : } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
946 13269413 : return cur;
947 : }
948 :
949 : static int
950 66810 : skip_c_comment(struct scanner * lc)
951 : {
952 66810 : int cur;
953 66810 : int prev = 0;
954 66810 : int started = lc->started;
955 66810 : int depth = 1;
956 :
957 66810 : lc->started = 1;
958 1347788 : while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
959 1280978 : if (prev == '*' && cur == '/')
960 66810 : depth--;
961 1214168 : else if (prev == '/' && cur == '*') {
962 : /* block comments can nest */
963 0 : cur = 0; /* prevent slash-star-slash from matching */
964 0 : depth++;
965 : }
966 : prev = cur;
967 : }
968 66810 : lc->yysval = lc->yycur;
969 66810 : lc->started = started;
970 : /* a comment is equivalent to a newline */
971 66810 : return cur == EOF ? cur : '\n';
972 : }
973 :
974 : static int
975 2762 : skip_sql_comment(struct scanner * lc)
976 : {
977 2762 : int cur;
978 2762 : int started = lc->started;
979 :
980 2762 : lc->started = 1;
981 816068 : while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
982 : ;
983 2762 : lc->yysval = lc->yycur;
984 2762 : lc->started = started;
985 : /* a comment is equivalent to a newline */
986 2762 : return cur;
987 : }
988 :
989 : static int tokenize(mvc * lc, int cur);
990 :
991 5010605 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
992 13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
993 10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
994 3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
995 :
996 1664320 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
997 1664320 : struct scanner *lc = &c->scanner;
998 1664320 : bool (*is_valid_n_ary_digit)(int);
999 :
1000 1664320 : if (pcur == '_' && !initial_underscore_allowed) /* ERROR: initial underscore not allowed */ {
1001 0 : *token = 0;
1002 0 : return '_';
1003 : }
1004 :
1005 1664320 : switch (type) {
1006 : case BINARYNUM:
1007 : is_valid_n_ary_digit = &is_valid_binary_digit;
1008 : break;
1009 3 : case OCTALNUM:
1010 3 : is_valid_n_ary_digit = &is_valid_octal_digit;
1011 3 : break;
1012 280 : case HEXADECIMALNUM:
1013 280 : is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
1014 280 : break;
1015 1664035 : default:
1016 1664035 : is_valid_n_ary_digit = &is_valid_decimal_digit;
1017 1664035 : break;
1018 : }
1019 :
1020 1664320 : if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
1021 17 : *token = 0;
1022 17 : return pcur;
1023 : }
1024 :
1025 1664187 : int cur = scanner_getc(lc);
1026 1664139 : *token = type;
1027 3351893 : while (cur != EOF) {
1028 3351896 : if (cur == '_') {
1029 25 : if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
1030 2 : *token = 0;
1031 2 : return '_';
1032 : }
1033 : }
1034 3351871 : else if (!is_valid_n_ary_digit(cur))
1035 : break;
1036 1687882 : pcur = cur;
1037 1687882 : cur = scanner_getc(lc);
1038 : }
1039 :
1040 1663810 : if (pcur == '_') {
1041 3 : *token = 0;
1042 3 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1043 : return cur;
1044 : else /* ERROR: number ends with underscore */
1045 : return '_';
1046 : }
1047 :
1048 : return cur;
1049 : }
1050 :
1051 : static int
1052 1651059 : number(mvc * c, int cur)
1053 : {
1054 1651059 : struct scanner *lc = &c->scanner;
1055 1651059 : int token = sqlINT;
1056 :
1057 : /* a number has one of these forms (expressed in regular expressions):
1058 : * 0x[0-9A-Fa-f]+ -- (hexadecimal) INTEGER
1059 : * \.[0-9]+ -- DECIMAL
1060 : * [0-9]+\.[0-9]* -- DECIMAL
1061 : * [0-9]+@0 -- OID
1062 : * [0-9]*\.[0-9]+[eE][-+]?[0-9]+ -- REAL
1063 : * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
1064 : * [0-9]+ -- (decimal) INTEGER
1065 : */
1066 1651059 : lc->started = 1;
1067 1651059 : if (cur == '0') {
1068 229856 : switch ((cur = scanner_getc(lc))) {
1069 2 : case 'b':
1070 2 : cur = scanner_getc(lc);
1071 2 : if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
1072 : break;
1073 3 : case 'o':
1074 3 : cur = scanner_getc(lc);
1075 3 : if ((cur = check_validity_number(c, cur, true, &token, OCTALNUM)) == EOF) return cur;
1076 : break;
1077 280 : case 'x':
1078 280 : cur = scanner_getc(lc);
1079 280 : if ((cur = check_validity_number(c, cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
1080 : break;
1081 229570 : default:
1082 229570 : utf8_putchar(lc, cur);
1083 229570 : cur = '0';
1084 : }
1085 : }
1086 1651058 : if (token == sqlINT) {
1087 1650757 : if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
1088 1650281 : if (cur == '@') {
1089 0 : if (token == sqlINT) {
1090 0 : cur = scanner_getc(lc);
1091 0 : if (cur == EOF)
1092 : return cur;
1093 0 : if (cur == '0') {
1094 0 : cur = scanner_getc(lc);
1095 0 : if (cur == EOF)
1096 : return cur;
1097 0 : token = OIDNUM;
1098 : } else {
1099 : /* number + '@' not followed by 0: show '@' as erroneous */
1100 0 : utf8_putchar(lc, cur);
1101 0 : cur = '@';
1102 0 : token = 0;
1103 : }
1104 : }
1105 : } else {
1106 1650281 : if (cur == '.') {
1107 11049 : cur = scanner_getc(lc);
1108 11049 : if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
1109 11043 : if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
1110 : }
1111 1650281 : if (token != 0)
1112 1650285 : if (cur == 'e' || cur == 'E') {
1113 2225 : cur = scanner_getc(lc);
1114 2225 : if (cur == '+' || cur == '-')
1115 2111 : cur = scanner_getc(lc);
1116 2225 : if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
1117 : }
1118 : }
1119 : }
1120 :
1121 1648357 : assert(cur != EOF);
1122 :
1123 1650582 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1124 6 : token = 0;
1125 :
1126 1650582 : utf8_putchar(lc, cur);
1127 :
1128 1650520 : if (token) {
1129 1650510 : return scanner_token(lc, token);
1130 : } else {
1131 10 : (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
1132 10 : return LEX_ERROR;
1133 : }
1134 : }
1135 :
1136 : static
1137 11934447 : int scanner_symbol(mvc * c, int cur)
1138 : {
1139 11934447 : struct scanner *lc = &c->scanner;
1140 11934447 : int next = 0;
1141 11934447 : int started = lc->started;
1142 :
1143 11934447 : switch (cur) {
1144 69451 : case '/':
1145 69451 : lc->started = 1;
1146 69451 : next = scanner_getc(lc);
1147 69451 : if (next < 0)
1148 : return EOF;
1149 69451 : if (next == '*') {
1150 66810 : lc->started = started;
1151 66810 : cur = skip_c_comment(lc);
1152 66810 : if (cur < 0)
1153 : return EOF;
1154 66810 : return tokenize(c, cur);
1155 : } else {
1156 2641 : utf8_putchar(lc, next);
1157 2641 : return scanner_token(lc, cur);
1158 : }
1159 0 : case '0':
1160 : case '1':
1161 : case '2':
1162 : case '3':
1163 : case '4':
1164 : case '5':
1165 : case '6':
1166 : case '7':
1167 : case '8':
1168 : case '9':
1169 0 : return number(c, cur);
1170 5 : case '#':
1171 5 : if ((cur = skip_sql_comment(lc)) == EOF)
1172 : return cur;
1173 5 : return tokenize(c, cur);
1174 686833 : case '\'':
1175 686833 : if (lc->raw_string_mode || lc->next_string_is_raw)
1176 46 : return scanner_string(c, cur, false);
1177 686787 : return scanner_string(c, cur, true);
1178 1267703 : case '"':
1179 1267703 : return scanner_string(c, cur, false);
1180 500 : case '{':
1181 : // if previous tokens like LANGUAGE IDENT
1182 : // TODO checking on IDENT only may not be enough
1183 500 : if (lc->yylast == IDENT)
1184 234 : return scanner_body(c);
1185 266 : lc->started = 1;
1186 266 : return scanner_token(lc, cur);
1187 266 : case '}':
1188 266 : lc->started = 1;
1189 266 : return scanner_token(lc, cur);
1190 29353 : case '-':
1191 29353 : lc->started = 1;
1192 29353 : next = scanner_getc(lc);
1193 29353 : if (next < 0)
1194 : return EOF;
1195 29352 : if (next == '-') {
1196 2757 : lc->started = started;
1197 2757 : if ((cur = skip_sql_comment(lc)) == EOF)
1198 : return cur;
1199 2757 : return tokenize(c, cur);
1200 : }
1201 26595 : lc->started = 1;
1202 26595 : utf8_putchar(lc, next);
1203 26595 : return scanner_token(lc, cur);
1204 12 : case '~': /* binary not */
1205 12 : lc->started = 1;
1206 12 : next = scanner_getc(lc);
1207 12 : if (next < 0)
1208 : return EOF;
1209 12 : if (next == '=')
1210 5 : return scanner_token(lc, GEOM_MBR_EQUAL);
1211 7 : utf8_putchar(lc, next);
1212 7 : return scanner_token(lc, cur);
1213 6614380 : case '^': /* binary xor */
1214 : case '*':
1215 : case '?':
1216 : case ':':
1217 : case '%':
1218 : case '+':
1219 : case '(':
1220 : case ')':
1221 : case ',':
1222 : case '=':
1223 : case '[':
1224 : case ']':
1225 6614380 : lc->started = 1;
1226 6614380 : return scanner_token(lc, cur);
1227 5749 : case '&':
1228 5749 : lc->started = 1;
1229 5749 : cur = scanner_getc(lc);
1230 5749 : if (cur < 0)
1231 : return EOF;
1232 5749 : if (cur < 0)
1233 : return EOF;
1234 5749 : if(cur == '<') {
1235 3 : next = scanner_getc(lc);
1236 3 : if (next < 0)
1237 : return EOF;
1238 3 : if(next == '|') {
1239 0 : return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
1240 : } else {
1241 3 : utf8_putchar(lc, next); //put the char back
1242 3 : return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
1243 : }
1244 5746 : } else if(cur == '>')
1245 3 : return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
1246 5743 : else if(cur == '&')
1247 3 : return scanner_token(lc, GEOM_OVERLAP);
1248 : else {/* binary and */
1249 5740 : utf8_putchar(lc, cur); //put the char back
1250 5740 : return scanner_token(lc, '&');
1251 : }
1252 19 : case '@':
1253 19 : lc->started = 1;
1254 19 : return scanner_token(lc, AT);
1255 942580 : case ';':
1256 942580 : lc->started = 0;
1257 942580 : return scanner_token(lc, SCOLON);
1258 27 : case '!':
1259 27 : lc->started = 1;
1260 27 : cur = scanner_getc(lc);
1261 27 : if (cur < 0)
1262 : return EOF;
1263 27 : else if (cur == '=') {
1264 21 : lc->rs->buf[lc->rs->pos + lc->yycur - 2] = '<';
1265 21 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = '>';
1266 21 : return scanner_token( lc, COMPARISON);
1267 : } else {
1268 6 : utf8_putchar(lc, cur); //put the char back
1269 : }
1270 6 : return scanner_token(lc, '!');
1271 50039 : case '<':
1272 50039 : lc->started = 1;
1273 50039 : cur = scanner_getc(lc);
1274 50039 : if (cur < 0)
1275 : return EOF;
1276 50039 : if (cur == '=') {
1277 3123 : return scanner_token( lc, COMPARISON);
1278 46916 : } else if (cur == '>') {
1279 33566 : return scanner_token( lc, COMPARISON);
1280 13350 : } else if (cur == '<') {
1281 44 : next = scanner_getc(lc);
1282 44 : if (next < 0)
1283 : return EOF;
1284 44 : if (next == '=') {
1285 4 : return scanner_token( lc, LEFT_SHIFT_ASSIGN);
1286 40 : } else if (next == '|') {
1287 1 : return scanner_token(lc, GEOM_BELOW);
1288 : } else {
1289 39 : utf8_putchar(lc, next); //put the char back
1290 39 : return scanner_token( lc, LEFT_SHIFT);
1291 : }
1292 13306 : } else if(cur == '-') {
1293 19 : next = scanner_getc(lc);
1294 19 : if (next < 0)
1295 : return EOF;
1296 19 : if(next == '>') {
1297 7 : return scanner_token(lc, GEOM_DIST);
1298 : } else {
1299 : //put the characters back and fall in the next possible case
1300 12 : utf8_putchar(lc, next);
1301 12 : utf8_putchar(lc, cur);
1302 12 : return scanner_token( lc, COMPARISON);
1303 : }
1304 : } else {
1305 13287 : utf8_putchar(lc, cur);
1306 13287 : return scanner_token( lc, COMPARISON);
1307 : }
1308 45217 : case '>':
1309 45217 : lc->started = 1;
1310 45217 : cur = scanner_getc(lc);
1311 45217 : if (cur < 0)
1312 : return EOF;
1313 45217 : if (cur == '>') {
1314 2409 : cur = scanner_getc(lc);
1315 2409 : if (cur < 0)
1316 : return EOF;
1317 2409 : if (cur == '=')
1318 3 : return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
1319 2406 : utf8_putchar(lc, cur);
1320 2406 : return scanner_token( lc, RIGHT_SHIFT);
1321 42808 : } else if (cur != '=') {
1322 40572 : utf8_putchar(lc, cur);
1323 40572 : return scanner_token( lc, COMPARISON);
1324 : } else {
1325 2236 : return scanner_token( lc, COMPARISON);
1326 : }
1327 2043742 : case '.':
1328 2043742 : lc->started = 1;
1329 2043742 : cur = scanner_getc(lc);
1330 2043742 : if (cur < 0)
1331 : return EOF;
1332 2043741 : if (!iswdigit(cur)) {
1333 2043728 : utf8_putchar(lc, cur);
1334 2043728 : return scanner_token( lc, '.');
1335 : } else {
1336 13 : utf8_putchar(lc, cur);
1337 13 : cur = '.';
1338 13 : return number(c, cur);
1339 : }
1340 178561 : case '|': /* binary or or string concat */
1341 178561 : lc->started = 1;
1342 178561 : cur = scanner_getc(lc);
1343 178561 : if (cur < 0)
1344 : return EOF;
1345 178561 : if (cur == '|') {
1346 178536 : return scanner_token(lc, CONCATSTRING);
1347 25 : } else if (cur == '&') {
1348 0 : next = scanner_getc(lc);
1349 0 : if (next < 0)
1350 : return EOF;
1351 0 : if(next == '>') {
1352 0 : return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
1353 : } else {
1354 0 : utf8_putchar(lc, next); //put the char back
1355 0 : utf8_putchar(lc, cur); //put the char back
1356 0 : return scanner_token(lc, '|');
1357 : }
1358 25 : } else if (cur == '>') {
1359 1 : next = scanner_getc(lc);
1360 1 : if (next < 0)
1361 : return EOF;
1362 1 : if(next == '>') {
1363 1 : return scanner_token(lc, GEOM_ABOVE);
1364 : } else {
1365 0 : utf8_putchar(lc, next); //put the char back
1366 0 : utf8_putchar(lc, cur); //put the char back
1367 0 : return scanner_token(lc, '|');
1368 : }
1369 : } else {
1370 24 : utf8_putchar(lc, cur);
1371 24 : return scanner_token(lc, '|');
1372 : }
1373 : }
1374 10 : (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
1375 10 : return LEX_ERROR;
1376 : }
1377 :
1378 : static int
1379 26511914 : tokenize(mvc * c, int cur)
1380 : {
1381 26511914 : struct scanner *lc = &c->scanner;
1382 53008454 : while (1) {
1383 39760184 : if (cur == 0xFEFF) {
1384 : /* on Linux at least, iswpunct returns TRUE
1385 : * for U+FEFF, but we don't want that, we just
1386 : * want to go to the scanner_error case
1387 : * below */
1388 : ;
1389 39760215 : } else if (iswspace(cur)) {
1390 13265588 : if ((cur = skip_white_space(lc)) == EOF)
1391 : return cur;
1392 13248270 : continue; /* try again */
1393 26494627 : } else if (iswdigit(cur)) {
1394 1651046 : return number(c, cur);
1395 24843581 : } else if (iswalpha(cur) || cur == '_') {
1396 12878600 : switch (cur) {
1397 644264 : case 'e': /* string with escapes */
1398 : case 'E':
1399 644264 : if (scanner_read_more(lc, 1) != EOF &&
1400 644264 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1401 3707 : return scanner_string(c, scanner_getc(lc), true);
1402 : }
1403 : break;
1404 412610 : case 'x': /* blob */
1405 : case 'X':
1406 : case 'r': /* raw string */
1407 : case 'R':
1408 412610 : if (scanner_read_more(lc, 1) != EOF &&
1409 412610 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1410 3276 : return scanner_string(c, scanner_getc(lc), false);
1411 : }
1412 : break;
1413 151433 : case 'u': /* unicode string */
1414 : case 'U':
1415 151433 : if (scanner_read_more(lc, 1) != EOF &&
1416 151450 : lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
1417 17 : scanner_read_more(lc, 2) != EOF &&
1418 17 : (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
1419 : lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
1420 17 : cur = scanner_getc(lc); /* '&' */
1421 17 : return scanner_string(c, scanner_getc(lc), false);
1422 : }
1423 : break;
1424 : default:
1425 : break;
1426 : }
1427 12902340 : return keyword_or_ident(c, cur);
1428 11934241 : } else if (iswpunct(cur)) {
1429 11934178 : return scanner_symbol(c, cur);
1430 : }
1431 32 : if (cur == EOF) {
1432 0 : if (lc->mode == LINE_1 || !lc->started )
1433 : return cur;
1434 0 : return scanner_error(c, cur);
1435 : }
1436 : /* none of the above: error */
1437 32 : return scanner_error(c, cur);
1438 : }
1439 : }
1440 :
1441 : /* SQL 'quoted' idents consist of a set of any character of
1442 : * the source language character set other than a 'quote'
1443 : *
1444 : * MonetDB has 3 restrictions:
1445 : * 1 we disallow '%' as the first character.
1446 : * 2 the length is limited to 1024 characters
1447 : * 3 the identifier 'TID%' is not allowed
1448 : */
1449 : static bool
1450 1267692 : valid_ident(const char *restrict s, char *restrict dst)
1451 : {
1452 1267692 : int p = 0;
1453 :
1454 1267692 : if (*s == '%')
1455 : return false;
1456 :
1457 9384008 : while (*s) {
1458 8116316 : if ((dst[p++] = *s++) == '"' && *s == '"')
1459 64 : s++;
1460 8116316 : if (p >= 1024)
1461 : return false;
1462 : }
1463 1267692 : dst[p] = '\0';
1464 1267692 : if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
1465 : return false;
1466 : return true;
1467 : }
1468 :
1469 : static inline int
1470 26615521 : sql_get_next_token(YYSTYPE *yylval, void *parm)
1471 : {
1472 26615521 : mvc *c = (mvc*)parm;
1473 26615521 : struct scanner *lc = &c->scanner;
1474 26615521 : int token = 0, cur = 0;
1475 :
1476 26615521 : if (lc->rs->buf == NULL) /* malloc failure */
1477 : return EOF;
1478 :
1479 26615521 : if (lc->yynext) {
1480 60349 : int next = lc->yynext;
1481 :
1482 60349 : lc->yynext = 0;
1483 60349 : return(next);
1484 : }
1485 :
1486 26555172 : if (lc->yybak) {
1487 25575441 : lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
1488 25575441 : lc->yybak = 0;
1489 : }
1490 :
1491 26555172 : lc->yysval = lc->yycur;
1492 26555172 : lc->yylast = lc->yyval;
1493 26555172 : cur = scanner_getc(lc);
1494 26554096 : if (cur < 0)
1495 : return EOF;
1496 26442789 : token = tokenize(c, cur);
1497 :
1498 26443309 : yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
1499 :
1500 26443309 : if (token == KW_TYPE)
1501 49328 : token = aTYPE;
1502 :
1503 26443309 : if (token == IDENT || token == COMPARISON ||
1504 21369801 : token == RANK || token == aTYPE || token == MARGFUNC) {
1505 5132410 : yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
1506 5132420 : lc->next_string_is_raw = false;
1507 21310899 : } else if (token == STRING) {
1508 1961506 : char quote = *yylval->sval;
1509 1961506 : char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
1510 1961506 : char *dst;
1511 :
1512 1961506 : assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
1513 :
1514 1961506 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
1515 1961506 : switch (quote) {
1516 1267692 : case '"':
1517 1267692 : if (valid_ident(yylval->sval+1,str)) {
1518 : token = IDENT;
1519 : } else {
1520 0 : sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
1521 0 : return LEX_ERROR;
1522 : }
1523 : break;
1524 3706 : case 'e':
1525 : case 'E':
1526 3706 : assert(yylval->sval[1] == '\'');
1527 3706 : if (GDKstrFromStr((unsigned char *) str,
1528 : (unsigned char *) yylval->sval + 2,
1529 3706 : lc->yycur-lc->yysval - 2, '\'') < 0) {
1530 1 : char *err = GDKerrbuf;
1531 1 : if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
1532 1 : err += strlen(GDKERROR);
1533 0 : else if (*err == '!')
1534 0 : err++;
1535 1 : sql_error(c, 1, SQLSTATE(42000) "%s", err);
1536 1 : return LEX_ERROR;
1537 : }
1538 : quote = '\'';
1539 : break;
1540 17 : case 'u':
1541 : case 'U':
1542 17 : assert(yylval->sval[1] == '&');
1543 17 : assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
1544 17 : strcpy(str, yylval->sval + 3);
1545 17 : token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
1546 17 : quote = yylval->sval[2];
1547 17 : lc->next_string_is_raw = true;
1548 17 : break;
1549 1 : case 'x':
1550 : case 'X':
1551 1 : assert(yylval->sval[1] == '\'');
1552 1 : dst = str;
1553 5 : for (char *src = yylval->sval + 2; *src; dst++)
1554 4 : if ((*dst = *src++) == '\'' && *src == '\'')
1555 0 : src++;
1556 1 : *dst = 0;
1557 1 : quote = '\'';
1558 1 : token = XSTRING;
1559 1 : lc->next_string_is_raw = true;
1560 1 : break;
1561 3268 : case 'r':
1562 : case 'R':
1563 3268 : assert(yylval->sval[1] == '\'');
1564 3268 : dst = str;
1565 449799 : for (char *src = yylval->sval + 2; *src; dst++)
1566 446531 : if ((*dst = *src++) == '\'' && *src == '\'')
1567 2732 : src++;
1568 3268 : quote = '\'';
1569 3268 : *dst = 0;
1570 3268 : break;
1571 686822 : default:
1572 686822 : if (lc->raw_string_mode || lc->next_string_is_raw) {
1573 46 : dst = str;
1574 436 : for (char *src = yylval->sval + 1; *src; dst++)
1575 390 : if ((*dst = *src++) == '\'' && *src == '\'')
1576 1 : src++;
1577 46 : *dst = 0;
1578 : } else {
1579 686776 : if (GDKstrFromStr((unsigned char *)str,
1580 686776 : (unsigned char *)yylval->sval + 1,
1581 686776 : lc->yycur - lc->yysval - 1,
1582 : '\'') < 0) {
1583 1 : sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
1584 1 : return LEX_ERROR;
1585 : }
1586 : }
1587 : break;
1588 : }
1589 1961504 : yylval->sval = str;
1590 :
1591 : /* reset original */
1592 1961504 : lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
1593 : } else {
1594 19349393 : lc->next_string_is_raw = false;
1595 : }
1596 :
1597 : return(token);
1598 : }
1599 :
1600 : static int scanner( YYSTYPE *yylval, void *m, bool log);
1601 :
1602 : static int
1603 26483797 : scanner(YYSTYPE * yylval, void *parm, bool log)
1604 : {
1605 26483797 : int token;
1606 26483797 : mvc *c = (mvc *) parm;
1607 26483797 : struct scanner *lc = &c->scanner;
1608 26483797 : size_t pos;
1609 :
1610 : /* store position for when view's query ends */
1611 26483797 : pos = lc->rs->pos + lc->yycur;
1612 :
1613 26483797 : token = sql_get_next_token(yylval, parm);
1614 :
1615 26482398 : if (token == NOT) {
1616 73568 : int next = scanner(yylval, parm, false);
1617 :
1618 73568 : if (next == NOT) {
1619 2 : return scanner(yylval, parm, false);
1620 : } else if (next == EXISTS) {
1621 : token = NOT_EXISTS;
1622 : } else if (next == BETWEEN) {
1623 : token = NOT_BETWEEN;
1624 : } else if (next == sqlIN) {
1625 : token = NOT_IN;
1626 : } else if (next == LIKE) {
1627 : token = NOT_LIKE;
1628 : } else if (next == ILIKE) {
1629 : token = NOT_ILIKE;
1630 : } else {
1631 60349 : lc->yynext = next;
1632 : }
1633 26408830 : } else if (token == SCOLON) {
1634 : /* ignore semi-colon(s) following a semi-colon */
1635 942556 : if (lc->yylast == SCOLON) {
1636 131999 : size_t prev = lc->yycur;
1637 132000 : while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
1638 1 : prev = lc->yycur;
1639 :
1640 : /* skip the skipped stuff also in the buffer */
1641 131999 : lc->rs->pos += prev;
1642 131999 : lc->yycur -= prev;
1643 : }
1644 : }
1645 :
1646 26482396 : if (lc->log && log)
1647 0 : mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
1648 :
1649 26482396 : lc->started += (token != EOF);
1650 26482396 : return token;
1651 : }
1652 :
1653 : /* also see sql_parser.y */
1654 : extern int sqllex(YYSTYPE * yylval, void *parm);
1655 :
1656 : int
1657 26410633 : sqllex(YYSTYPE * yylval, void *parm)
1658 : {
1659 26410633 : return scanner(yylval, parm, true);
1660 : }
|