Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include <wctype.h>
15 : #include "sql_mem.h"
16 : #include "sql_scan.h"
17 : #include "sql_types.h"
18 : #include "sql_symbol.h"
19 : #include "sql_mvc.h"
20 : #include "sql_parser.tab.h"
21 : #include "sql_semantic.h"
22 : #include "sql_parser.h" /* for sql_error() */
23 :
24 : #include "stream.h"
25 : #include "mapi_prompt.h"
26 : #include <unistd.h>
27 : #include <string.h>
28 : #include <ctype.h>
29 : #include "sql_keyword.h"
30 :
31 : static char *
32 17 : uescape_xform(char *restrict s, const char *restrict esc)
33 : {
34 17 : size_t i, j;
35 :
36 60 : for (i = j = 0; s[i]; i++) {
37 43 : if (s[i] == *esc) {
38 43 : if (s[i + 1] == *esc) {
39 0 : s[j++] = *esc;
40 0 : i++;
41 : } else {
42 43 : int c = 0;
43 43 : int n;
44 43 : if (s[i + 1] == '+') {
45 26 : n = 6;
46 26 : i++;
47 : } else {
48 : n = 4;
49 : }
50 224 : do {
51 224 : i++;
52 224 : c <<= 4;
53 224 : if ('0' <= s[i] && s[i] <= '9')
54 170 : c |= s[i] - '0';
55 54 : else if ('a' <= s[i] && s[i] <= 'f')
56 24 : c |= s[i] - 'a' + 10;
57 30 : else if ('A' <= s[i] && s[i] <= 'F')
58 30 : c |= s[i] - 'A' + 10;
59 : else
60 : return NULL;
61 224 : } while (--n > 0);
62 43 : if (c == 0 || c > 0x10FFFF || (c & 0xFFF800) == 0xD800)
63 : return NULL;
64 43 : if (c < 0x80) {
65 14 : s[j++] = c;
66 : } else {
67 29 : if (c < 0x800) {
68 5 : s[j++] = 0xC0 | (c >> 6);
69 : } else {
70 24 : if (c < 0x10000) {
71 12 : s[j++] = 0xE0 | (c >> 12);
72 : } else {
73 12 : s[j++] = 0xF0 | (c >> 18);
74 12 : s[j++] = 0x80 | ((c >> 12) & 0x3F);
75 : }
76 24 : s[j++] = 0x80 | ((c >> 6) & 0x3F);
77 : }
78 29 : s[j++] = 0x80 | (c & 0x3F);
79 : }
80 : }
81 : } else {
82 0 : s[j++] = s[i];
83 : }
84 : }
85 17 : s[j] = 0;
86 17 : return s;
87 : }
88 :
89 : /**
90 : * Removes all comments before the query. In query comments are kept.
91 : */
92 : char *
93 441454 : query_cleaned(allocator *sa, const char *query)
94 : {
95 441454 : char *q, *r, *c = NULL;
96 441454 : int lines = 0;
97 441454 : int quote = 0; /* inside quotes ('..', "..", {..}) */
98 441454 : bool bs = false; /* seen a backslash in a quoted string */
99 441454 : bool incomment1 = false; /* inside traditional C style comment */
100 441454 : bool incomment2 = false; /* inside comment starting with -- */
101 441454 : bool inline_comment = false;
102 :
103 441454 : r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
104 441582 : if(!r)
105 : return NULL;
106 :
107 70208981 : for (q = r; *query; query++) {
108 69767399 : if (incomment1) {
109 16396 : if (*query == '/' && query[-1] == '*') {
110 237 : incomment1 = false;
111 237 : if (c == r && lines > 0) {
112 229 : q = r; // reset to beginning
113 229 : lines = 0;
114 229 : continue;
115 : }
116 : }
117 16167 : if (*query == '\n') lines++;
118 16167 : *q++ = *query;
119 69751003 : } else if (incomment2) {
120 831313 : if (*query == '\n') {
121 2914 : incomment2 = false;
122 2914 : inline_comment = false;
123 : /* add newline only if comment doesn't
124 : * occupy whole line */
125 2914 : if (q > r && q[-1] != '\n'){
126 998 : *q++ = '\n';
127 998 : lines++;
128 : }
129 828399 : } else if (inline_comment){
130 23850 : *q++ = *query; // preserve in line query comments
131 : }
132 68919690 : } else if (quote) {
133 22407157 : if (bs) {
134 : bs = false;
135 22403845 : } else if (*query == '\\') {
136 : bs = true;
137 22400533 : } else if (*query == quote) {
138 691562 : quote = 0;
139 : }
140 22407157 : *q++ = *query;
141 46512533 : } else if (*query == '"' || *query == '\'') {
142 691094 : quote = *query;
143 691094 : *q++ = *query;
144 45821439 : } else if (*query == '{') {
145 513 : quote = '}';
146 513 : *q++ = *query;
147 45820926 : } else if (*query == '-' && query[1] == '-') {
148 2914 : if (q > r && q[-1] != '\n') {
149 998 : inline_comment = true;
150 998 : *q++ = *query; // preserve in line query comments
151 : }
152 : incomment2 = true;
153 45818012 : } else if (*query == '/' && query[1] == '*') {
154 237 : incomment1 = true;
155 237 : c = q;
156 237 : *q++ = *query;
157 45817775 : } else if (*query == '\n') {
158 : /* collapse newlines */
159 910081 : if (q > r && q[-1] != '\n') {
160 868197 : *q++ = '\n';
161 868197 : lines++;
162 : }
163 44907694 : } else if (*query == ' ' || *query == '\t') {
164 : /* collapse white space */
165 7222435 : if (q > r && q[-1] != ' ')
166 5737086 : *q++ = ' ';
167 : } else {
168 37685259 : *q++ = *query;
169 : }
170 : }
171 441582 : *q = 0;
172 441582 : return r;
173 : }
174 :
175 : int
176 358 : scanner_init_keywords(void)
177 : {
178 358 : int failed = 0;
179 :
180 358 : failed += keywords_insert("false", BOOL_FALSE);
181 358 : failed += keywords_insert("true", BOOL_TRUE);
182 358 : failed += keywords_insert("bool", sqlBOOL);
183 :
184 358 : failed += keywords_insert("ALTER", ALTER);
185 358 : failed += keywords_insert("ADD", ADD);
186 358 : failed += keywords_insert("AND", AND);
187 :
188 358 : failed += keywords_insert("RANK", RANK);
189 358 : failed += keywords_insert("DENSE_RANK", RANK);
190 358 : failed += keywords_insert("PERCENT_RANK", RANK);
191 358 : failed += keywords_insert("CUME_DIST", RANK);
192 358 : failed += keywords_insert("ROW_NUMBER", RANK);
193 358 : failed += keywords_insert("NTILE", RANK);
194 358 : failed += keywords_insert("LAG", RANK);
195 358 : failed += keywords_insert("LEAD", RANK);
196 358 : failed += keywords_insert("FETCH", FETCH);
197 358 : failed += keywords_insert("FIRST_VALUE", RANK);
198 358 : failed += keywords_insert("LAST_VALUE", RANK);
199 358 : failed += keywords_insert("NTH_VALUE", RANK);
200 :
201 358 : failed += keywords_insert("BEST", BEST);
202 358 : failed += keywords_insert("EFFORT", EFFORT);
203 :
204 358 : failed += keywords_insert("AS", AS);
205 358 : failed += keywords_insert("ASC", ASC);
206 358 : failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
207 358 : failed += keywords_insert("BETWEEN", BETWEEN);
208 358 : failed += keywords_insert("SYMMETRIC", SYMMETRIC);
209 358 : failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
210 358 : failed += keywords_insert("BY", BY);
211 358 : failed += keywords_insert("CAST", CAST);
212 358 : failed += keywords_insert("CONVERT", CONVERT);
213 358 : failed += keywords_insert("CHARACTER", CHARACTER);
214 358 : failed += keywords_insert("CHAR", CHARACTER);
215 358 : failed += keywords_insert("VARYING", VARYING);
216 358 : failed += keywords_insert("VARCHAR", VARCHAR);
217 358 : failed += keywords_insert("BINARY", BINARY);
218 358 : failed += keywords_insert("LARGE", LARGE);
219 358 : failed += keywords_insert("OBJECT", OBJECT);
220 358 : failed += keywords_insert("CLOB", CLOB);
221 358 : failed += keywords_insert("BLOB", sqlBLOB);
222 358 : failed += keywords_insert("TEXT", sqlTEXT);
223 358 : failed += keywords_insert("TINYTEXT", sqlTEXT);
224 358 : failed += keywords_insert("STRING", CLOB); /* ? */
225 358 : failed += keywords_insert("CHECK", CHECK);
226 358 : failed += keywords_insert("CLIENT", CLIENT);
227 358 : failed += keywords_insert("SERVER", SERVER);
228 358 : failed += keywords_insert("COMMENT", COMMENT);
229 358 : failed += keywords_insert("CONSTRAINT", CONSTRAINT);
230 358 : failed += keywords_insert("CREATE", CREATE);
231 358 : failed += keywords_insert("CROSS", CROSS);
232 358 : failed += keywords_insert("COPY", COPY);
233 358 : failed += keywords_insert("RECORDS", RECORDS);
234 358 : failed += keywords_insert("DELIMITERS", DELIMITERS);
235 358 : failed += keywords_insert("STDIN", STDIN);
236 358 : failed += keywords_insert("STDOUT", STDOUT);
237 :
238 358 : failed += keywords_insert("TINYINT", TINYINT);
239 358 : failed += keywords_insert("SMALLINT", SMALLINT);
240 358 : failed += keywords_insert("INTEGER", sqlINTEGER);
241 358 : failed += keywords_insert("INT", sqlINTEGER);
242 358 : failed += keywords_insert("MEDIUMINT", sqlINTEGER);
243 358 : failed += keywords_insert("BIGINT", BIGINT);
244 : #ifdef HAVE_HGE
245 358 : failed += keywords_insert("HUGEINT", HUGEINT);
246 : #endif
247 358 : failed += keywords_insert("DEC", sqlDECIMAL);
248 358 : failed += keywords_insert("DECIMAL", sqlDECIMAL);
249 358 : failed += keywords_insert("NUMERIC", sqlDECIMAL);
250 358 : failed += keywords_insert("DECLARE", DECLARE);
251 358 : failed += keywords_insert("DEFAULT", DEFAULT);
252 358 : failed += keywords_insert("DESC", DESC);
253 358 : failed += keywords_insert("DISTINCT", DISTINCT);
254 358 : failed += keywords_insert("DOUBLE", sqlDOUBLE);
255 358 : failed += keywords_insert("REAL", sqlREAL);
256 358 : failed += keywords_insert("DROP", DROP);
257 358 : failed += keywords_insert("ESCAPE", ESCAPE);
258 358 : failed += keywords_insert("EXISTS", EXISTS);
259 358 : failed += keywords_insert("UESCAPE", UESCAPE);
260 358 : failed += keywords_insert("EXTRACT", EXTRACT);
261 358 : failed += keywords_insert("FLOAT", sqlFLOAT);
262 358 : failed += keywords_insert("FOR", FOR);
263 358 : failed += keywords_insert("FOREIGN", FOREIGN);
264 358 : failed += keywords_insert("FROM", FROM);
265 358 : failed += keywords_insert("FWF", FWF);
266 :
267 358 : failed += keywords_insert("BIG", BIG);
268 358 : failed += keywords_insert("LITTLE", LITTLE);
269 358 : failed += keywords_insert("NATIVE", NATIVE);
270 358 : failed += keywords_insert("ENDIAN", ENDIAN);
271 :
272 358 : failed += keywords_insert("REFERENCES", REFERENCES);
273 :
274 358 : failed += keywords_insert("MATCH", MATCH);
275 358 : failed += keywords_insert("FULL", FULL);
276 358 : failed += keywords_insert("PARTIAL", PARTIAL);
277 358 : failed += keywords_insert("SIMPLE", SIMPLE);
278 :
279 358 : failed += keywords_insert("INSERT", INSERT);
280 358 : failed += keywords_insert("UPDATE", UPDATE);
281 358 : failed += keywords_insert("DELETE", sqlDELETE);
282 358 : failed += keywords_insert("TRUNCATE", TRUNCATE);
283 358 : failed += keywords_insert("MATCHED", MATCHED);
284 :
285 358 : failed += keywords_insert("ACTION", ACTION);
286 358 : failed += keywords_insert("CASCADE", CASCADE);
287 358 : failed += keywords_insert("RESTRICT", RESTRICT);
288 358 : failed += keywords_insert("FIRST", FIRST);
289 358 : failed += keywords_insert("GLOBAL", GLOBAL);
290 358 : failed += keywords_insert("GROUP", sqlGROUP);
291 358 : failed += keywords_insert("GROUPING", GROUPING);
292 358 : failed += keywords_insert("ROLLUP", ROLLUP);
293 358 : failed += keywords_insert("CUBE", CUBE);
294 358 : failed += keywords_insert("HAVING", HAVING);
295 358 : failed += keywords_insert("ILIKE", ILIKE);
296 358 : failed += keywords_insert("IMPRINTS", IMPRINTS);
297 358 : failed += keywords_insert("IN", sqlIN);
298 358 : failed += keywords_insert("INNER", INNER);
299 358 : failed += keywords_insert("INTO", INTO);
300 358 : failed += keywords_insert("IS", IS);
301 358 : failed += keywords_insert("JOIN", JOIN);
302 358 : failed += keywords_insert("KEY", KEY);
303 358 : failed += keywords_insert("LATERAL", LATERAL);
304 358 : failed += keywords_insert("LEFT", LEFT);
305 358 : failed += keywords_insert("LIKE", LIKE);
306 358 : failed += keywords_insert("LIMIT", LIMIT);
307 358 : failed += keywords_insert("SAMPLE", SAMPLE);
308 358 : failed += keywords_insert("SEED", SEED);
309 358 : failed += keywords_insert("LAST", LAST);
310 358 : failed += keywords_insert("LOCAL", LOCAL);
311 358 : failed += keywords_insert("NATURAL", NATURAL);
312 358 : failed += keywords_insert("NOT", NOT);
313 358 : failed += keywords_insert("NULL", sqlNULL);
314 358 : failed += keywords_insert("NULLS", NULLS);
315 358 : failed += keywords_insert("OFFSET", OFFSET);
316 358 : failed += keywords_insert("ON", ON);
317 358 : failed += keywords_insert("OPTIONS", OPTIONS);
318 358 : failed += keywords_insert("OPTION", OPTION);
319 358 : failed += keywords_insert("OR", OR);
320 358 : failed += keywords_insert("ORDER", ORDER);
321 358 : failed += keywords_insert("ORDERED", ORDERED);
322 358 : failed += keywords_insert("OUTER", OUTER);
323 358 : failed += keywords_insert("OVER", OVER);
324 358 : failed += keywords_insert("PARTITION", PARTITION);
325 358 : failed += keywords_insert("PATH", PATH);
326 358 : failed += keywords_insert("PRECISION", PRECISION);
327 358 : failed += keywords_insert("PRIMARY", PRIMARY);
328 :
329 358 : failed += keywords_insert("USER", USER);
330 358 : failed += keywords_insert("RENAME", RENAME);
331 358 : failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
332 358 : failed += keywords_insert("ENCRYPTED", ENCRYPTED);
333 358 : failed += keywords_insert("PASSWORD", PASSWORD);
334 358 : failed += keywords_insert("GRANT", GRANT);
335 358 : failed += keywords_insert("REVOKE", REVOKE);
336 358 : failed += keywords_insert("ROLE", ROLE);
337 358 : failed += keywords_insert("ADMIN", ADMIN);
338 358 : failed += keywords_insert("PRIVILEGES", PRIVILEGES);
339 358 : failed += keywords_insert("PUBLIC", PUBLIC);
340 358 : failed += keywords_insert("CURRENT_USER", CURRENT_USER);
341 358 : failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
342 358 : failed += keywords_insert("SESSION_USER", SESSION_USER);
343 358 : failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
344 358 : failed += keywords_insert("SESSION", sqlSESSION);
345 358 : failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
346 358 : failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
347 358 : failed += keywords_insert("OPTIMIZER", OPTIMIZER);
348 :
349 358 : failed += keywords_insert("RIGHT", RIGHT);
350 358 : failed += keywords_insert("SCHEMA", SCHEMA);
351 358 : failed += keywords_insert("SELECT", SELECT);
352 358 : failed += keywords_insert("SET", SET);
353 358 : failed += keywords_insert("SETS", SETS);
354 358 : failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
355 :
356 358 : failed += keywords_insert("ALL", ALL);
357 358 : failed += keywords_insert("ANY", ANY);
358 358 : failed += keywords_insert("SOME", SOME);
359 358 : failed += keywords_insert("EVERY", ANY);
360 : /*
361 : failed += keywords_insert("SQLCODE", SQLCODE );
362 : */
363 358 : failed += keywords_insert("COLUMN", COLUMN);
364 358 : failed += keywords_insert("TABLE", TABLE);
365 358 : failed += keywords_insert("TEMPORARY", TEMPORARY);
366 358 : failed += keywords_insert("TEMP", TEMP);
367 358 : failed += keywords_insert("REMOTE", REMOTE);
368 358 : failed += keywords_insert("MERGE", MERGE);
369 358 : failed += keywords_insert("REPLICA", REPLICA);
370 358 : failed += keywords_insert("UNLOGGED", UNLOGGED);
371 358 : failed += keywords_insert("TO", TO);
372 358 : failed += keywords_insert("UNION", UNION);
373 358 : failed += keywords_insert("EXCEPT", EXCEPT);
374 358 : failed += keywords_insert("INTERSECT", INTERSECT);
375 358 : failed += keywords_insert("CORRESPONDING", CORRESPONDING);
376 358 : failed += keywords_insert("UNIQUE", UNIQUE);
377 358 : failed += keywords_insert("USING", USING);
378 358 : failed += keywords_insert("VALUES", VALUES);
379 358 : failed += keywords_insert("VIEW", VIEW);
380 358 : failed += keywords_insert("WHERE", WHERE);
381 358 : failed += keywords_insert("WITH", WITH);
382 358 : failed += keywords_insert("WITHIN", WITHIN);
383 358 : failed += keywords_insert("WITHOUT", WITHOUT);
384 358 : failed += keywords_insert("DATA", DATA);
385 :
386 358 : failed += keywords_insert("DATE", sqlDATE);
387 358 : failed += keywords_insert("TIME", TIME);
388 358 : failed += keywords_insert("TIMESTAMP", TIMESTAMP);
389 358 : failed += keywords_insert("INTERVAL", INTERVAL);
390 358 : failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
391 358 : failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
392 358 : failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
393 358 : failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
394 358 : failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
395 358 : failed += keywords_insert("LOCALTIME", LOCALTIME);
396 358 : failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
397 358 : failed += keywords_insert("ZONE", ZONE);
398 :
399 358 : failed += keywords_insert("CENTURY", CENTURY);
400 358 : failed += keywords_insert("DECADE", DECADE);
401 358 : failed += keywords_insert("YEAR", YEAR);
402 358 : failed += keywords_insert("QUARTER", QUARTER);
403 358 : failed += keywords_insert("MONTH", MONTH);
404 358 : failed += keywords_insert("WEEK", WEEK);
405 358 : failed += keywords_insert("DOW", DOW);
406 358 : failed += keywords_insert("DOY", DOY);
407 358 : failed += keywords_insert("DAY", DAY);
408 358 : failed += keywords_insert("HOUR", HOUR);
409 358 : failed += keywords_insert("MINUTE", MINUTE);
410 358 : failed += keywords_insert("SECOND", SECOND);
411 358 : failed += keywords_insert("EPOCH", EPOCH);
412 :
413 358 : failed += keywords_insert("POSITION", POSITION);
414 358 : failed += keywords_insert("SUBSTRING", SUBSTRING);
415 358 : failed += keywords_insert("SPLIT_PART", SPLIT_PART);
416 358 : failed += keywords_insert("TRIM", TRIM);
417 358 : failed += keywords_insert("LEADING", LEADING);
418 358 : failed += keywords_insert("TRAILING", TRAILING);
419 358 : failed += keywords_insert("BOTH", BOTH);
420 :
421 358 : failed += keywords_insert("CASE", CASE);
422 358 : failed += keywords_insert("WHEN", WHEN);
423 358 : failed += keywords_insert("THEN", THEN);
424 358 : failed += keywords_insert("ELSE", ELSE);
425 358 : failed += keywords_insert("END", END);
426 358 : failed += keywords_insert("NULLIF", NULLIF);
427 358 : failed += keywords_insert("COALESCE", COALESCE);
428 358 : failed += keywords_insert("ELSEIF", ELSEIF);
429 358 : failed += keywords_insert("IF", IF);
430 358 : failed += keywords_insert("WHILE", WHILE);
431 358 : failed += keywords_insert("DO", DO);
432 :
433 358 : failed += keywords_insert("COMMIT", COMMIT);
434 358 : failed += keywords_insert("ROLLBACK", ROLLBACK);
435 358 : failed += keywords_insert("SAVEPOINT", SAVEPOINT);
436 358 : failed += keywords_insert("RELEASE", RELEASE);
437 358 : failed += keywords_insert("WORK", WORK);
438 358 : failed += keywords_insert("CHAIN", CHAIN);
439 358 : failed += keywords_insert("PRESERVE", PRESERVE);
440 358 : failed += keywords_insert("ROWS", ROWS);
441 358 : failed += keywords_insert("NO", NO);
442 358 : failed += keywords_insert("START", START);
443 358 : failed += keywords_insert("TRANSACTION", TRANSACTION);
444 358 : failed += keywords_insert("READ", READ);
445 358 : failed += keywords_insert("WRITE", WRITE);
446 358 : failed += keywords_insert("ONLY", ONLY);
447 358 : failed += keywords_insert("ISOLATION", ISOLATION);
448 358 : failed += keywords_insert("LEVEL", LEVEL);
449 358 : failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
450 358 : failed += keywords_insert("COMMITTED", COMMITTED);
451 358 : failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
452 358 : failed += keywords_insert("SNAPSHOT", SNAPSHOT);
453 358 : failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
454 358 : failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
455 358 : failed += keywords_insert("SIZE", sqlSIZE);
456 358 : failed += keywords_insert("STORAGE", STORAGE);
457 :
458 358 : failed += keywords_insert("TYPE", TYPE);
459 358 : failed += keywords_insert("PROCEDURE", PROCEDURE);
460 358 : failed += keywords_insert("FUNCTION", FUNCTION);
461 358 : failed += keywords_insert("LOADER", sqlLOADER);
462 358 : failed += keywords_insert("REPLACE", REPLACE);
463 :
464 : //failed += keywords_insert("FIELD", FIELD);
465 358 : failed += keywords_insert("FILTER", FILTER);
466 358 : failed += keywords_insert("AGGREGATE", AGGREGATE);
467 358 : failed += keywords_insert("RETURNS", RETURNS);
468 358 : failed += keywords_insert("EXTERNAL", EXTERNAL);
469 358 : failed += keywords_insert("NAME", sqlNAME);
470 358 : failed += keywords_insert("RETURN", RETURN);
471 358 : failed += keywords_insert("CALL", CALL);
472 358 : failed += keywords_insert("LANGUAGE", LANGUAGE);
473 :
474 358 : failed += keywords_insert("ANALYZE", ANALYZE);
475 358 : failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
476 358 : failed += keywords_insert("PLAN", SQL_PLAN);
477 358 : failed += keywords_insert("TRACE", SQL_TRACE);
478 358 : failed += keywords_insert("PREPARE", PREPARE);
479 358 : failed += keywords_insert("PREP", PREP);
480 358 : failed += keywords_insert("EXECUTE", EXECUTE);
481 358 : failed += keywords_insert("EXEC", EXEC);
482 358 : failed += keywords_insert("DEALLOCATE", DEALLOCATE);
483 :
484 358 : failed += keywords_insert("INDEX", INDEX);
485 :
486 358 : failed += keywords_insert("SEQUENCE", SEQUENCE);
487 358 : failed += keywords_insert("RESTART", RESTART);
488 358 : failed += keywords_insert("INCREMENT", INCREMENT);
489 358 : failed += keywords_insert("MAXVALUE", MAXVALUE);
490 358 : failed += keywords_insert("MINVALUE", MINVALUE);
491 358 : failed += keywords_insert("CYCLE", CYCLE);
492 358 : failed += keywords_insert("CACHE", CACHE);
493 358 : failed += keywords_insert("NEXT", NEXT);
494 358 : failed += keywords_insert("VALUE", VALUE);
495 358 : failed += keywords_insert("GENERATED", GENERATED);
496 358 : failed += keywords_insert("ALWAYS", ALWAYS);
497 358 : failed += keywords_insert("IDENTITY", IDENTITY);
498 358 : failed += keywords_insert("SERIAL", SERIAL);
499 358 : failed += keywords_insert("BIGSERIAL", BIGSERIAL);
500 358 : failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
501 358 : failed += keywords_insert("CONTINUE", CONTINUE);
502 :
503 358 : failed += keywords_insert("TRIGGER", TRIGGER);
504 358 : failed += keywords_insert("ATOMIC", ATOMIC);
505 358 : failed += keywords_insert("BEGIN", BEGIN);
506 358 : failed += keywords_insert("OF", OF);
507 358 : failed += keywords_insert("BEFORE", BEFORE);
508 358 : failed += keywords_insert("AFTER", AFTER);
509 358 : failed += keywords_insert("ROW", ROW);
510 358 : failed += keywords_insert("STATEMENT", STATEMENT);
511 358 : failed += keywords_insert("NEW", sqlNEW);
512 358 : failed += keywords_insert("OLD", OLD);
513 358 : failed += keywords_insert("EACH", EACH);
514 358 : failed += keywords_insert("REFERENCING", REFERENCING);
515 :
516 358 : failed += keywords_insert("RANGE", RANGE);
517 358 : failed += keywords_insert("UNBOUNDED", UNBOUNDED);
518 358 : failed += keywords_insert("PRECEDING", PRECEDING);
519 358 : failed += keywords_insert("FOLLOWING", FOLLOWING);
520 358 : failed += keywords_insert("CURRENT", CURRENT);
521 358 : failed += keywords_insert("EXCLUDE", EXCLUDE);
522 358 : failed += keywords_insert("OTHERS", OTHERS);
523 358 : failed += keywords_insert("TIES", TIES);
524 358 : failed += keywords_insert("GROUPS", GROUPS);
525 358 : failed += keywords_insert("WINDOW", WINDOW);
526 358 : failed += keywords_insert("QUALIFY", QUALIFY);
527 :
528 : /* special SQL/XML keywords */
529 358 : failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
530 358 : failed += keywords_insert("XMLCONCAT", XMLCONCAT);
531 358 : failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
532 358 : failed += keywords_insert("XMLELEMENT", XMLELEMENT);
533 358 : failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
534 358 : failed += keywords_insert("XMLFOREST", XMLFOREST);
535 358 : failed += keywords_insert("XMLPARSE", XMLPARSE);
536 358 : failed += keywords_insert("STRIP", STRIP);
537 358 : failed += keywords_insert("WHITESPACE", WHITESPACE);
538 358 : failed += keywords_insert("XMLPI", XMLPI);
539 358 : failed += keywords_insert("XMLQUERY", XMLQUERY);
540 358 : failed += keywords_insert("PASSING", PASSING);
541 358 : failed += keywords_insert("XMLTEXT", XMLTEXT);
542 358 : failed += keywords_insert("NIL", NIL);
543 358 : failed += keywords_insert("REF", REF);
544 358 : failed += keywords_insert("ABSENT", ABSENT);
545 358 : failed += keywords_insert("DOCUMENT", DOCUMENT);
546 358 : failed += keywords_insert("ELEMENT", ELEMENT);
547 358 : failed += keywords_insert("CONTENT", CONTENT);
548 358 : failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
549 358 : failed += keywords_insert("NAMESPACE", NAMESPACE);
550 358 : failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
551 358 : failed += keywords_insert("RETURNING", RETURNING);
552 358 : failed += keywords_insert("RECURSIVE", RECURSIVE);
553 358 : failed += keywords_insert("LOCATION", LOCATION);
554 358 : failed += keywords_insert("ID", ID);
555 358 : failed += keywords_insert("ACCORDING", ACCORDING);
556 358 : failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
557 358 : failed += keywords_insert("URI", URI);
558 358 : failed += keywords_insert("XMLAGG", XMLAGG);
559 :
560 : /* keywords for opengis */
561 358 : failed += keywords_insert("GEOMETRY", GEOMETRY);
562 :
563 358 : failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
564 358 : failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
565 358 : failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
566 358 : failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
567 358 : failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
568 358 : failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
569 358 : failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
570 :
571 358 : failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
572 358 : failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
573 358 : failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
574 358 : failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
575 358 : failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
576 358 : failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
577 358 : failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
578 :
579 358 : failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
580 358 : failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
581 358 : failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
582 358 : failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
583 358 : failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
584 358 : failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
585 358 : failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
586 :
587 358 : failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
588 358 : failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
589 358 : failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
590 358 : failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
591 358 : failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
592 358 : failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
593 358 : failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
594 358 : failed += keywords_insert("LOGIN", LOGIN);
595 : // odbc keywords
596 358 : failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
597 358 : failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
598 358 : failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
599 358 : failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
600 358 : failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
601 358 : failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
602 358 : failed += keywords_insert("DAYNAME", DAYNAME);
603 358 : failed += keywords_insert("IFNULL", IFNULL);
604 358 : failed += keywords_insert("MONTHNAME", MONTHNAME);
605 358 : failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
606 358 : failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
607 358 : failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
608 358 : failed += keywords_insert("SQL_BINARY", SQL_BINARY);
609 358 : failed += keywords_insert("SQL_BIT", SQL_BIT);
610 358 : failed += keywords_insert("SQL_CHAR", SQL_CHAR);
611 358 : failed += keywords_insert("SQL_DATE", SQL_DATE);
612 358 : failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
613 358 : failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
614 358 : failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
615 358 : failed += keywords_insert("SQL_GUID", SQL_GUID);
616 358 : failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
617 358 : failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
618 358 : failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
619 358 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
620 358 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
621 358 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
622 358 : failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
623 358 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
624 358 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
625 358 : failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
626 358 : failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
627 358 : failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
628 358 : failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
629 358 : failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
630 358 : failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
631 358 : failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
632 358 : failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
633 358 : failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
634 358 : failed += keywords_insert("SQL_REAL", SQL_REAL);
635 358 : failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
636 358 : failed += keywords_insert("SQL_TIME", SQL_TIME);
637 358 : failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
638 358 : failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
639 358 : failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
640 358 : failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
641 358 : failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
642 358 : failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
643 358 : failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
644 358 : failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
645 358 : failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
646 358 : failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
647 358 : failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
648 358 : failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
649 358 : failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
650 358 : failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
651 358 : failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
652 358 : failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
653 :
654 358 : failed += keywords_insert("LEAST", MARGFUNC);
655 358 : failed += keywords_insert("GREATEST", MARGFUNC);
656 :
657 358 : failed += keywords_insert("SETOF", SETOF);
658 358 : failed += keywords_insert("ARRAY", ARRAY);
659 358 : return failed;
660 : }
661 :
662 : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
663 :
664 : void
665 252745 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
666 : {
667 505490 : *s = (struct scanner) {
668 : .rs = rs,
669 : .ws = ws,
670 : .mode = LINE_N,
671 252745 : .raw_string_mode = GDKgetenv_istrue("raw_strings"),
672 : .aborted = false,
673 : };
674 252745 : }
675 :
676 : void
677 1403563 : scanner_query_processed(struct scanner *s)
678 : {
679 1403563 : int cur;
680 :
681 1403563 : if (s->yybak) {
682 521021 : s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
683 521021 : s->yybak = 0;
684 : }
685 1403563 : if (s->rs) {
686 1403563 : s->rs->pos += s->yycur;
687 : /* completely eat the query including white space after the ; */
688 2570023 : while (s->rs->pos < s->rs->len &&
689 2161054 : (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
690 1166460 : s->rs->pos++;
691 : }
692 : }
693 : /*assert(s->rs->pos <= s->rs->len);*/
694 1403563 : s->yycur = 0;
695 1403563 : s->started = 0;
696 1403563 : s->as = 0;
697 1403563 : s->schema = NULL;
698 1403563 : s->brackets = 0;
699 1403563 : }
700 :
701 : static int
702 33 : scanner_error(mvc *lc, int cur)
703 : {
704 33 : switch (cur) {
705 0 : case EOF:
706 0 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
707 0 : return EOF;
708 33 : default:
709 : /* on Windows at least, iswcntrl returns TRUE for
710 : * U+FEFF, but we just want consistent error
711 : * messages */
712 33 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
713 : }
714 33 : return LEX_ERROR;
715 : }
716 :
717 :
718 : /*
719 : UTF-8 encoding is as follows:
720 : U-00000000 - U-0000007F: 0xxxxxxx
721 : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
722 : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
723 : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
724 : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
725 : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
726 : */
727 : /* To be correctly coded UTF-8, the sequence should be the shortest
728 : possible encoding of the value being encoded. This means that for
729 : an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
730 : utf8chkmsk[n] should be non-zero (else the encoding could be
731 : shorter).
732 : */
733 : static const int utf8chkmsk[] = {
734 : 0x0000007f,
735 : 0x00000780,
736 : 0x0000f800,
737 : 0x001f0000,
738 : 0x03e00000,
739 : 0x7c000000
740 : };
741 :
742 : static void
743 32844368 : utf8_putchar(struct scanner *lc, int ch)
744 : {
745 32844368 : if ((ch) < 0x80) {
746 32844363 : lc->yycur--;
747 5 : } else if ((ch) < 0x800) {
748 0 : lc->yycur -= 2;
749 5 : } else if ((ch) < 0x10000) {
750 5 : lc->yycur -= 3;
751 : } else {
752 0 : lc->yycur -= 4;
753 : }
754 32844368 : }
755 :
756 : static inline int
757 142615254 : scanner_read_more(struct scanner *lc, size_t n)
758 : {
759 142615254 : bstream *b = lc->rs;
760 142615254 : bool more = false;
761 :
762 :
763 142615254 : if (lc->aborted)
764 : return EOF;
765 142618847 : while (b->len < b->pos + lc->yycur + n) {
766 :
767 126291 : if (lc->mode == LINE_1 || !lc->started)
768 : return EOF;
769 :
770 : /* query is not finished ask for more */
771 0 : if (b->eof || !isa_block_stream(b->s)) {
772 0 : if (bstream_getoob(b)) {
773 0 : lc->aborted = true;
774 0 : return EOF;
775 : }
776 1800 : if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
777 1800 : mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
778 1800 : b->eof = false;
779 1800 : more = true;
780 : }
781 : /* we need more query text */
782 3600 : if (bstream_next(b) < 0) {
783 0 : if (mnstr_errnr(b->s) == MNSTR_INTERRUPT) {
784 : // now what?
785 0 : lc->errstr = "Query aborted";
786 0 : lc->aborted = true;
787 0 : mnstr_clearerr(b->s);
788 : }
789 0 : return EOF;
790 3600 : } else if (/* we asked for more data but didn't get any */
791 1800 : (more && b->eof && b->len < b->pos + lc->yycur + n))
792 : return EOF;
793 3593 : if (more && b->pos + lc->yycur + 2 == b->len && b->buf[b->pos + lc->yycur] == '\200' && b->buf[b->pos + lc->yycur + 1] == '\n') {
794 0 : lc->errstr = "Query aborted";
795 0 : b->len -= 2;
796 0 : b->buf[b->len] = 0;
797 0 : return EOF;
798 : }
799 : }
800 : return 1;
801 : }
802 :
803 : static inline int
804 141326244 : scanner_getc(struct scanner *lc)
805 : {
806 141326244 : bstream *b = lc->rs;
807 141326244 : unsigned char *s = NULL;
808 141326244 : int c, m, n, mask;
809 :
810 141326244 : if (scanner_read_more(lc, 1) == EOF) {
811 : //lc->errstr = SQLSTATE(42000) "end of input stream";
812 : return EOF;
813 : }
814 141213127 : lc->errstr = NULL;
815 :
816 141213127 : s = (unsigned char *) b->buf + b->pos + lc->yycur++;
817 141213127 : if (((c = *s) & 0x80) == 0) {
818 : /* 7-bit char */
819 : return c;
820 : }
821 88250 : for (n = 0, m = 0x40; c & m; n++, m >>= 1)
822 : ;
823 : /* n now is number of 10xxxxxx bytes that should follow */
824 29443 : if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
825 : /* incorrect UTF-8 sequence */
826 : /* n==0: c == 10xxxxxx */
827 : /* n>=6: c == 1111111x */
828 0 : lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
829 0 : goto error;
830 : }
831 :
832 29443 : if (scanner_read_more(lc, (size_t) n) == EOF)
833 : return EOF;
834 29443 : s = (unsigned char *) b->buf + b->pos + lc->yycur;
835 :
836 29443 : mask = utf8chkmsk[n];
837 29443 : c &= ~(0xFFC0 >> n); /* remove non-x bits */
838 88249 : while (--n >= 0) {
839 58807 : c <<= 6;
840 58807 : lc->yycur++;
841 58807 : if (((m = *s++) & 0xC0) != 0x80) {
842 : /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
843 : /* this includes end-of-string (m == 0) */
844 1 : lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
845 1 : goto error;
846 : }
847 58806 : c |= m & 0x3F;
848 : }
849 29442 : if ((c & mask) == 0) {
850 : /* incorrect UTF-8 sequence: not shortest possible */
851 0 : lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
852 0 : goto error;
853 : }
854 :
855 : return c;
856 :
857 1 : error:
858 1 : if (b->pos + lc->yycur < b->len) /* skip bogus char */
859 0 : lc->yycur++;
860 : return EOF;
861 : }
862 :
863 : static int
864 29324200 : scanner_token(struct scanner *lc, int token)
865 : {
866 29324200 : lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
867 29324200 : lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
868 29324200 : lc->yyval = token;
869 29324200 : return lc->yyval;
870 : }
871 :
872 : static int
873 2162976 : scanner_string(mvc *c, int quote, bool escapes)
874 : {
875 2162976 : struct scanner *lc = &c->scanner;
876 2162976 : bstream *rs = lc->rs;
877 2162976 : int cur = quote;
878 2162976 : bool escape = false;
879 2162976 : const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
880 :
881 2162976 : lc->started = 1;
882 2201125 : while (cur != EOF) {
883 2201110 : size_t pos = 0;
884 2201110 : const size_t yycur = rs->pos + lc->yycur;
885 :
886 35769415 : while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
887 33568305 : (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
888 67107156 : cur && (cur != quote || escape)) {
889 31367196 : if (escapes && cur == '\\')
890 6840 : escape = !escape;
891 : else
892 : escape = false;
893 : }
894 2201110 : if (pos == limit) {
895 0 : (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
896 0 : return LEX_ERROR;
897 : }
898 : /* BOM character not allowed as an identifier */
899 2201110 : if (cur == EOF || (quote == '"' && cur == 0xFEFF))
900 1 : return scanner_error(c, cur);
901 2201109 : lc->yycur += pos;
902 : /* check for quote escaped quote: Obscure SQL Rule */
903 2201109 : if (cur == quote && rs->buf[yycur + pos] == quote) {
904 8708 : lc->yycur++;
905 8708 : continue;
906 : }
907 2192401 : assert(yycur + pos <= rs->len + 1);
908 2192401 : if (cur == quote && !escape) {
909 2162946 : return scanner_token(lc, STRING);
910 : }
911 29455 : lc->yycur--; /* go back to current (possibly invalid) char */
912 : /* long utf8, if correct isn't the quote */
913 29455 : if (!cur) {
914 30 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
915 14 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
916 14 : return LEX_ERROR;
917 : }
918 16 : cur = scanner_read_more(lc, 1);
919 : } else {
920 29425 : cur = scanner_getc(lc);
921 : }
922 : }
923 15 : (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
924 15 : return EOF;
925 : }
926 :
927 : /* scan a structure {blah} into a string. We only count the matching {}
928 : * unless escaped. We do not consider embeddings in string literals yet
929 : */
930 :
931 : static int
932 234 : scanner_body(mvc *c)
933 : {
934 234 : struct scanner *lc = &c->scanner;
935 234 : bstream *rs = lc->rs;
936 234 : int cur = (int) 'x';
937 234 : int blk = 1;
938 234 : bool escape = false;
939 :
940 234 : lc->started = 1;
941 234 : assert(rs->buf[rs->pos + lc->yycur-1] == '{');
942 290 : while (cur != EOF) {
943 290 : size_t pos = rs->pos + lc->yycur;
944 :
945 32350 : while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
946 32060 : if (cur != '\\')
947 : escape = false;
948 : else
949 12 : escape = !escape;
950 32060 : blk += cur =='{';
951 32060 : blk -= cur =='}';
952 : }
953 290 : lc->yycur = pos - rs->pos;
954 290 : assert(pos <= rs->len + 1);
955 290 : if (blk == 0 && !escape){
956 234 : lc->yycur--; /* go back to current (possibly invalid) char */
957 234 : return scanner_token(lc, X_BODY);
958 : }
959 56 : lc->yycur--; /* go back to current (possibly invalid) char */
960 56 : if (!cur) {
961 56 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
962 0 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
963 0 : return LEX_ERROR;
964 : }
965 56 : cur = scanner_read_more(lc, 1);
966 : } else {
967 0 : cur = scanner_getc(lc);
968 : }
969 : }
970 0 : (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
971 0 : return EOF;
972 : }
973 :
974 : static int
975 14069388 : keyword_or_ident(mvc * c, int cur)
976 : {
977 14069388 : struct scanner *lc = &c->scanner;
978 14069388 : keyword *k = NULL;
979 14069388 : size_t s;
980 :
981 14069388 : lc->started = 1;
982 14069388 : utf8_putchar(lc, cur);
983 14069386 : s = lc->yycur;
984 14069386 : lc->yyval = IDENT;
985 84476417 : while ((cur = scanner_getc(lc)) != EOF) {
986 84476155 : if (!iswalnum(cur) && cur != '_') {
987 14069124 : utf8_putchar(lc, cur);
988 14069131 : (void)scanner_token(lc, IDENT);
989 14069131 : if ((k = find_keyword_bs(lc,s)))
990 8620159 : lc->yyval = k->token;
991 14069582 : return lc->yyval;
992 : }
993 : }
994 : if (cur < 0)
995 : return cur;
996 : (void)scanner_token(lc, IDENT);
997 : if ((k = find_keyword_bs(lc,s)))
998 : lc->yyval = k->token;
999 : return lc->yyval;
1000 : }
1001 :
1002 : static int
1003 14658214 : skip_white_space(struct scanner * lc)
1004 : {
1005 18287317 : int cur;
1006 :
1007 18287317 : do {
1008 18287317 : lc->yysval = lc->yycur;
1009 18287317 : } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
1010 14656962 : return cur;
1011 : }
1012 :
1013 : static int
1014 71266 : skip_c_comment(struct scanner * lc)
1015 : {
1016 71266 : int cur;
1017 71266 : int prev = 0;
1018 71266 : int started = lc->started;
1019 71266 : int depth = 1;
1020 :
1021 71266 : lc->started = 1;
1022 1437108 : while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
1023 1365842 : if (prev == '*' && cur == '/')
1024 71266 : depth--;
1025 1294576 : else if (prev == '/' && cur == '*') {
1026 : /* block comments can nest */
1027 0 : cur = 0; /* prevent slash-star-slash from matching */
1028 0 : depth++;
1029 : }
1030 : prev = cur;
1031 : }
1032 71266 : lc->yysval = lc->yycur;
1033 71266 : lc->started = started;
1034 : /* a comment is equivalent to a newline */
1035 71266 : return cur == EOF ? cur : '\n';
1036 : }
1037 :
1038 : static int
1039 3273 : skip_sql_comment(struct scanner * lc)
1040 : {
1041 3273 : int cur;
1042 3273 : int started = lc->started;
1043 :
1044 3273 : lc->started = 1;
1045 833837 : while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
1046 : ;
1047 3273 : lc->yysval = lc->yycur;
1048 3273 : lc->started = started;
1049 : /* a comment is equivalent to a newline */
1050 3273 : return cur;
1051 : }
1052 :
1053 : static int tokenize(mvc * lc, int cur);
1054 :
1055 5990658 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
1056 13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
1057 10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
1058 3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
1059 :
1060 1999177 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
1061 1999177 : struct scanner *lc = &c->scanner;
1062 1999177 : bool (*is_valid_n_ary_digit)(int);
1063 :
1064 1999177 : if (pcur == '_' && !initial_underscore_allowed) /* ERROR: initial underscore not allowed */ {
1065 0 : *token = 0;
1066 0 : return '_';
1067 : }
1068 :
1069 1999177 : switch (type) {
1070 : case BINARYNUM:
1071 : is_valid_n_ary_digit = &is_valid_binary_digit;
1072 : break;
1073 3 : case OCTALNUM:
1074 3 : is_valid_n_ary_digit = &is_valid_octal_digit;
1075 3 : break;
1076 280 : case HEXADECIMALNUM:
1077 280 : is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
1078 280 : break;
1079 1998892 : default:
1080 1998892 : is_valid_n_ary_digit = &is_valid_decimal_digit;
1081 1998892 : break;
1082 : }
1083 :
1084 1999177 : if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
1085 18 : *token = 0;
1086 18 : return pcur;
1087 : }
1088 :
1089 1999297 : int cur = scanner_getc(lc);
1090 1999472 : *token = type;
1091 4008388 : while (cur != EOF) {
1092 4008190 : if (cur == '_') {
1093 25 : if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
1094 2 : *token = 0;
1095 2 : return '_';
1096 : }
1097 : }
1098 4008165 : else if (!is_valid_n_ary_digit(cur))
1099 : break;
1100 2009261 : pcur = cur;
1101 2009261 : cur = scanner_getc(lc);
1102 : }
1103 :
1104 1998806 : if (pcur == '_') {
1105 3 : *token = 0;
1106 3 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1107 : return cur;
1108 : else /* ERROR: number ends with underscore */
1109 : return '_';
1110 : }
1111 :
1112 : return cur;
1113 : }
1114 :
1115 : static int
1116 1985410 : number(mvc * c, int cur)
1117 : {
1118 1985410 : struct scanner *lc = &c->scanner;
1119 1985410 : int token = sqlINT;
1120 :
1121 : /* a number has one of these forms (expressed in regular expressions):
1122 : * 0x[0-9A-Fa-f]+ -- (hexadecimal) INTEGER
1123 : * \.[0-9]+ -- DECIMAL
1124 : * [0-9]+\.[0-9]* -- DECIMAL
1125 : * [0-9]+@0 -- OID
1126 : * [0-9]*\.[0-9]+[eE][-+]?[0-9]+ -- REAL
1127 : * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
1128 : * [0-9]+ -- (decimal) INTEGER
1129 : */
1130 1985410 : lc->started = 1;
1131 1985410 : if (cur == '0') {
1132 349174 : switch ((cur = scanner_getc(lc))) {
1133 2 : case 'b':
1134 2 : cur = scanner_getc(lc);
1135 2 : if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
1136 : break;
1137 3 : case 'o':
1138 3 : cur = scanner_getc(lc);
1139 3 : if ((cur = check_validity_number(c, cur, true, &token, OCTALNUM)) == EOF) return cur;
1140 : break;
1141 280 : case 'x':
1142 280 : cur = scanner_getc(lc);
1143 280 : if ((cur = check_validity_number(c, cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
1144 : break;
1145 348894 : default:
1146 348894 : utf8_putchar(lc, cur);
1147 348894 : cur = '0';
1148 : }
1149 : }
1150 1985413 : if (token == sqlINT) {
1151 1985309 : if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
1152 1985125 : if (cur == '@') {
1153 0 : if (token == sqlINT) {
1154 0 : cur = scanner_getc(lc);
1155 0 : if (cur == EOF)
1156 : return cur;
1157 0 : if (cur == '0') {
1158 0 : cur = scanner_getc(lc);
1159 0 : if (cur == EOF)
1160 : return cur;
1161 0 : token = OIDNUM;
1162 : } else {
1163 : /* number + '@' not followed by 0: show '@' as erroneous */
1164 0 : utf8_putchar(lc, cur);
1165 0 : cur = '@';
1166 0 : token = 0;
1167 : }
1168 : }
1169 : } else {
1170 1985125 : if (cur == '.') {
1171 11186 : cur = scanner_getc(lc);
1172 11186 : if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
1173 11179 : if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
1174 : }
1175 1985125 : if (token != 0)
1176 1984785 : if (cur == 'e' || cur == 'E') {
1177 2231 : cur = scanner_getc(lc);
1178 2231 : if (cur == '+' || cur == '-')
1179 2111 : cur = scanner_getc(lc);
1180 2231 : if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
1181 : }
1182 : }
1183 : }
1184 :
1185 1982998 : assert(cur != EOF);
1186 :
1187 1985229 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1188 6 : token = 0;
1189 :
1190 1985229 : utf8_putchar(lc, cur);
1191 :
1192 1985207 : if (token) {
1193 1985197 : return scanner_token(lc, token);
1194 : } else {
1195 10 : (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
1196 10 : return LEX_ERROR;
1197 : }
1198 : }
1199 :
1200 : static
1201 13337237 : int scanner_symbol(mvc * c, int cur)
1202 : {
1203 13337237 : struct scanner *lc = &c->scanner;
1204 13337237 : int next = 0;
1205 13337237 : int started = lc->started;
1206 :
1207 13337237 : switch (cur) {
1208 73932 : case '/':
1209 73932 : lc->started = 1;
1210 73932 : next = scanner_getc(lc);
1211 73932 : if (next < 0)
1212 : return EOF;
1213 73932 : if (next == '*') {
1214 71266 : lc->started = started;
1215 71266 : cur = skip_c_comment(lc);
1216 71266 : if (cur < 0)
1217 : return EOF;
1218 71266 : return tokenize(c, cur);
1219 : } else {
1220 2666 : utf8_putchar(lc, next);
1221 2666 : return scanner_token(lc, cur);
1222 : }
1223 0 : case '0':
1224 : case '1':
1225 : case '2':
1226 : case '3':
1227 : case '4':
1228 : case '5':
1229 : case '6':
1230 : case '7':
1231 : case '8':
1232 : case '9':
1233 0 : return number(c, cur);
1234 8 : case '#':
1235 8 : if ((cur = skip_sql_comment(lc)) == EOF)
1236 : return cur;
1237 8 : return tokenize(c, cur);
1238 825110 : case '\'':
1239 825110 : if (lc->raw_string_mode || lc->next_string_is_raw)
1240 50 : return scanner_string(c, cur, false);
1241 825060 : return scanner_string(c, cur, true);
1242 1330639 : case '"':
1243 1330639 : return scanner_string(c, cur, false);
1244 500 : case '{':
1245 : // if previous tokens like LANGUAGE IDENT
1246 : // TODO checking on IDENT only may not be enough
1247 500 : if (lc->yylast == IDENT)
1248 234 : return scanner_body(c);
1249 266 : lc->started = 1;
1250 266 : return scanner_token(lc, cur);
1251 266 : case '}':
1252 266 : lc->started = 1;
1253 266 : return scanner_token(lc, cur);
1254 30595 : case '-':
1255 30595 : lc->started = 1;
1256 30595 : next = scanner_getc(lc);
1257 30595 : if (next < 0)
1258 : return EOF;
1259 30594 : if (next == '-') {
1260 3265 : lc->started = started;
1261 3265 : if ((cur = skip_sql_comment(lc)) == EOF)
1262 : return cur;
1263 3265 : return tokenize(c, cur);
1264 : }
1265 27329 : lc->started = 1;
1266 27329 : utf8_putchar(lc, next);
1267 27329 : return scanner_token(lc, cur);
1268 12 : case '~': /* binary not */
1269 12 : lc->started = 1;
1270 12 : next = scanner_getc(lc);
1271 12 : if (next < 0)
1272 : return EOF;
1273 12 : if (next == '=')
1274 5 : return scanner_token(lc, GEOM_MBR_EQUAL);
1275 7 : utf8_putchar(lc, next);
1276 7 : return scanner_token(lc, cur);
1277 7450536 : case '^': /* binary xor */
1278 : case '*':
1279 : case ':':
1280 : case '%':
1281 : case '+':
1282 : case '(':
1283 : case ')':
1284 : case ',':
1285 : case '=':
1286 : case '[':
1287 : case ']':
1288 7450536 : lc->started = 1;
1289 7450536 : return scanner_token(lc, cur);
1290 1615 : case '?':
1291 1615 : lc->started = 1;
1292 1615 : return scanner_token(lc, PARAM);
1293 6397 : case '&':
1294 6397 : lc->started = 1;
1295 6397 : cur = scanner_getc(lc);
1296 6397 : if (cur < 0)
1297 : return EOF;
1298 6397 : if (cur < 0)
1299 : return EOF;
1300 6397 : if(cur == '<') {
1301 3 : next = scanner_getc(lc);
1302 3 : if (next < 0)
1303 : return EOF;
1304 3 : if(next == '|') {
1305 0 : return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
1306 : } else {
1307 3 : utf8_putchar(lc, next); //put the char back
1308 3 : return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
1309 : }
1310 6394 : } else if(cur == '>')
1311 3 : return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
1312 6391 : else if(cur == '&')
1313 3 : return scanner_token(lc, GEOM_OVERLAP);
1314 : else {/* binary and */
1315 6388 : utf8_putchar(lc, cur); //put the char back
1316 6388 : return scanner_token(lc, '&');
1317 : }
1318 19 : case '@':
1319 19 : lc->started = 1;
1320 19 : return scanner_token(lc, AT);
1321 1046546 : case ';':
1322 1046546 : lc->started = 0;
1323 1046546 : return scanner_token(lc, SCOLON);
1324 35 : case '!':
1325 35 : lc->started = 1;
1326 35 : cur = scanner_getc(lc);
1327 35 : if (cur < 0)
1328 : return EOF;
1329 35 : else if (cur == '=') {
1330 29 : lc->rs->buf[lc->rs->pos + lc->yycur - 2] = '<';
1331 29 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = '>';
1332 29 : return scanner_token( lc, COMPARISON);
1333 : } else {
1334 6 : utf8_putchar(lc, cur); //put the char back
1335 : }
1336 6 : return scanner_token(lc, '!');
1337 52814 : case '<':
1338 52814 : lc->started = 1;
1339 52814 : cur = scanner_getc(lc);
1340 52814 : if (cur < 0)
1341 : return EOF;
1342 52814 : if (cur == '=') {
1343 3140 : return scanner_token( lc, COMPARISON);
1344 49674 : } else if (cur == '>') {
1345 35951 : return scanner_token( lc, COMPARISON);
1346 13723 : } else if (cur == '<') {
1347 44 : next = scanner_getc(lc);
1348 44 : if (next < 0)
1349 : return EOF;
1350 44 : if (next == '=') {
1351 4 : return scanner_token( lc, LEFT_SHIFT_ASSIGN);
1352 40 : } else if (next == '|') {
1353 1 : return scanner_token(lc, GEOM_BELOW);
1354 : } else {
1355 39 : utf8_putchar(lc, next); //put the char back
1356 39 : return scanner_token( lc, LEFT_SHIFT);
1357 : }
1358 13679 : } else if(cur == '-') {
1359 19 : next = scanner_getc(lc);
1360 19 : if (next < 0)
1361 : return EOF;
1362 19 : if(next == '>') {
1363 7 : return scanner_token(lc, GEOM_DIST);
1364 : } else {
1365 : //put the characters back and fall in the next possible case
1366 12 : utf8_putchar(lc, next);
1367 12 : utf8_putchar(lc, cur);
1368 12 : return scanner_token( lc, COMPARISON);
1369 : }
1370 : } else {
1371 13660 : utf8_putchar(lc, cur);
1372 13660 : return scanner_token( lc, COMPARISON);
1373 : }
1374 47904 : case '>':
1375 47904 : lc->started = 1;
1376 47904 : cur = scanner_getc(lc);
1377 47904 : if (cur < 0)
1378 : return EOF;
1379 47904 : if (cur == '>') {
1380 2713 : cur = scanner_getc(lc);
1381 2713 : if (cur < 0)
1382 : return EOF;
1383 2713 : if (cur == '=')
1384 3 : return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
1385 2710 : utf8_putchar(lc, cur);
1386 2710 : return scanner_token( lc, RIGHT_SHIFT);
1387 45191 : } else if (cur != '=') {
1388 42921 : utf8_putchar(lc, cur);
1389 42921 : return scanner_token( lc, COMPARISON);
1390 : } else {
1391 2270 : return scanner_token( lc, COMPARISON);
1392 : }
1393 2281360 : case '.':
1394 2281360 : lc->started = 1;
1395 2281360 : cur = scanner_getc(lc);
1396 2281360 : if (cur < 0)
1397 : return EOF;
1398 2281359 : if (!iswdigit(cur)) {
1399 2281345 : utf8_putchar(lc, cur);
1400 2281348 : return scanner_token( lc, '.');
1401 : } else {
1402 14 : utf8_putchar(lc, cur);
1403 14 : cur = '.';
1404 14 : return number(c, cur);
1405 : }
1406 188939 : case '|': /* binary or or string concat */
1407 188939 : lc->started = 1;
1408 188939 : cur = scanner_getc(lc);
1409 188939 : if (cur < 0)
1410 : return EOF;
1411 188939 : if (cur == '|') {
1412 188914 : return scanner_token(lc, CONCATSTRING);
1413 25 : } else if (cur == '&') {
1414 0 : next = scanner_getc(lc);
1415 0 : if (next < 0)
1416 : return EOF;
1417 0 : if(next == '>') {
1418 0 : return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
1419 : } else {
1420 0 : utf8_putchar(lc, next); //put the char back
1421 0 : utf8_putchar(lc, cur); //put the char back
1422 0 : return scanner_token(lc, '|');
1423 : }
1424 25 : } else if (cur == '>') {
1425 1 : next = scanner_getc(lc);
1426 1 : if (next < 0)
1427 : return EOF;
1428 1 : if(next == '>') {
1429 1 : return scanner_token(lc, GEOM_ABOVE);
1430 : } else {
1431 0 : utf8_putchar(lc, next); //put the char back
1432 0 : utf8_putchar(lc, cur); //put the char back
1433 0 : return scanner_token(lc, '|');
1434 : }
1435 : } else {
1436 24 : utf8_putchar(lc, cur);
1437 24 : return scanner_token(lc, '|');
1438 : }
1439 : }
1440 10 : (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
1441 10 : return LEX_ERROR;
1442 : }
1443 :
1444 : static int
1445 29415909 : tokenize(mvc * c, int cur)
1446 : {
1447 29415909 : struct scanner *lc = &c->scanner;
1448 58687271 : while (1) {
1449 44051590 : if (cur == 0xFEFF) {
1450 : /* on Linux at least, iswpunct returns TRUE
1451 : * for U+FEFF, but we don't want that, we just
1452 : * want to go to the scanner_error case
1453 : * below */
1454 : ;
1455 44052010 : } else if (iswspace(cur)) {
1456 14652729 : if ((cur = skip_white_space(lc)) == EOF)
1457 : return cur;
1458 14635681 : continue; /* try again */
1459 29399281 : } else if (iswdigit(cur)) {
1460 1985883 : return number(c, cur);
1461 27413398 : } else if (iswalpha(cur) || cur == '_') {
1462 14039541 : switch (cur) {
1463 668188 : case 'e': /* string with escapes */
1464 : case 'E':
1465 668188 : if (scanner_read_more(lc, 1) != EOF &&
1466 668188 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1467 3915 : return scanner_string(c, scanner_getc(lc), true);
1468 : }
1469 : break;
1470 428340 : case 'x': /* blob */
1471 : case 'X':
1472 : case 'r': /* raw string */
1473 : case 'R':
1474 428340 : if (scanner_read_more(lc, 1) != EOF &&
1475 428340 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1476 3295 : return scanner_string(c, scanner_getc(lc), false);
1477 : }
1478 : break;
1479 162207 : case 'u': /* unicode string */
1480 : case 'U':
1481 162207 : if (scanner_read_more(lc, 1) != EOF &&
1482 162224 : lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
1483 17 : scanner_read_more(lc, 2) != EOF &&
1484 17 : (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
1485 : lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
1486 17 : cur = scanner_getc(lc); /* '&' */
1487 17 : return scanner_string(c, scanner_getc(lc), false);
1488 : }
1489 : break;
1490 : default:
1491 : break;
1492 : }
1493 14069395 : return keyword_or_ident(c, cur);
1494 13336776 : } else if (iswpunct(cur)) {
1495 13336324 : return scanner_symbol(c, cur);
1496 : }
1497 32 : if (cur == EOF) {
1498 0 : if (lc->mode == LINE_1 || !lc->started )
1499 : return cur;
1500 0 : return scanner_error(c, cur);
1501 : }
1502 : /* none of the above: error */
1503 32 : return scanner_error(c, cur);
1504 : }
1505 : }
1506 :
1507 : /* SQL 'quoted' idents consist of a set of any character of
1508 : * the source language character set other than a 'quote'
1509 : *
1510 : * MonetDB has 3 restrictions:
1511 : * 1 we disallow '%' as the first character.
1512 : * 2 the length is limited to 1024 characters
1513 : * 3 the identifier 'TID%' is not allowed
1514 : */
1515 : static bool
1516 1330628 : valid_ident(bool admin, const char *restrict s, char *restrict dst)
1517 : {
1518 1330628 : int p = 0;
1519 :
1520 1330628 : if (!admin && *s == '%')
1521 : return false;
1522 :
1523 9848093 : while (*s) {
1524 8517465 : if ((dst[p++] = *s++) == '"' && *s == '"')
1525 66 : s++;
1526 8517465 : if (p >= 1024)
1527 : return false;
1528 : }
1529 1330628 : dst[p] = '\0';
1530 1330628 : if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
1531 : return false;
1532 : return true;
1533 : }
1534 :
1535 : static inline int
1536 29805488 : sql_get_next_token(YYSTYPE *yylval, void *parm)
1537 : {
1538 29805488 : mvc *c = (mvc*)parm;
1539 29805488 : struct scanner *lc = &c->scanner;
1540 29805488 : int token = 0, cur = 0;
1541 :
1542 29805488 : if (lc->rs->buf == NULL) /* malloc failure */
1543 : return EOF;
1544 :
1545 29805488 : if (lc->yynext) {
1546 358570 : int next = lc->yynext;
1547 :
1548 358570 : lc->yynext = 0;
1549 358570 : return(next);
1550 : }
1551 :
1552 29446918 : if (lc->yybak) {
1553 28369692 : lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
1554 28369692 : lc->yybak = 0;
1555 : }
1556 :
1557 29446918 : lc->yysval = lc->yycur;
1558 29446918 : lc->yylast = lc->yyval;
1559 29446918 : cur = scanner_getc(lc);
1560 29451656 : if (cur < 0)
1561 : return EOF;
1562 29340845 : token = tokenize(c, cur);
1563 :
1564 29333307 : yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
1565 :
1566 29333307 : if (token == KW_TYPE)
1567 : token = aTYPE;
1568 29280694 : if (token == KW_OPERATORS)
1569 11249 : token = OPERATORS;
1570 :
1571 29333307 : if (token == IDENT || token == COMPARISON ||
1572 23785891 : token == RANK || token == aTYPE || token == MARGFUNC || token == OPERATORS) {
1573 5618886 : yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
1574 5619231 : lc->next_string_is_raw = false;
1575 23714421 : } else if (token == STRING) {
1576 2162946 : char quote = *yylval->sval;
1577 2162946 : char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
1578 2162946 : char *dst;
1579 :
1580 2162946 : assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
1581 :
1582 2162946 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
1583 2162946 : switch (quote) {
1584 1330628 : case '"':
1585 1330628 : if (valid_ident(c->user_id == USER_MONETDB || c->user_id == ROLE_SYSADMIN, yylval->sval+1,str)) {
1586 : token = IDENT;
1587 : } else {
1588 0 : sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
1589 0 : return LEX_ERROR;
1590 : }
1591 : break;
1592 3914 : case 'e':
1593 : case 'E':
1594 3914 : assert(yylval->sval[1] == '\'');
1595 3914 : if (GDKstrFromStr((unsigned char *) str,
1596 : (unsigned char *) yylval->sval + 2,
1597 3914 : lc->yycur-lc->yysval - 2, '\'') < 0) {
1598 1 : char *err = GDKerrbuf;
1599 1 : if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
1600 1 : err += strlen(GDKERROR);
1601 0 : else if (*err == '!')
1602 0 : err++;
1603 1 : sql_error(c, 1, SQLSTATE(42000) "%s", err);
1604 1 : return LEX_ERROR;
1605 : }
1606 : quote = '\'';
1607 : break;
1608 17 : case 'u':
1609 : case 'U':
1610 17 : assert(yylval->sval[1] == '&');
1611 17 : assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
1612 17 : strcpy(str, yylval->sval + 3);
1613 17 : token = yylval->sval[2] == '\'' ? -STRING : -IDENT; /* Passing unicode string/ident as - numbers, handled
1614 : later in scanner */
1615 17 : quote = yylval->sval[2];
1616 17 : lc->next_string_is_raw = true;
1617 17 : break;
1618 1 : case 'x':
1619 : case 'X':
1620 1 : assert(yylval->sval[1] == '\'');
1621 1 : dst = str;
1622 5 : for (char *src = yylval->sval + 2; *src; dst++)
1623 4 : if ((*dst = *src++) == '\'' && *src == '\'')
1624 0 : src++;
1625 1 : *dst = 0;
1626 1 : quote = '\'';
1627 1 : token = XSTRING;
1628 1 : lc->next_string_is_raw = true;
1629 1 : break;
1630 3287 : case 'r':
1631 : case 'R':
1632 3287 : assert(yylval->sval[1] == '\'');
1633 3287 : dst = str;
1634 450623 : for (char *src = yylval->sval + 2; *src; dst++)
1635 447336 : if ((*dst = *src++) == '\'' && *src == '\'')
1636 2780 : src++;
1637 3287 : quote = '\'';
1638 3287 : *dst = 0;
1639 3287 : break;
1640 825099 : default:
1641 825099 : if (lc->raw_string_mode || lc->next_string_is_raw) {
1642 50 : dst = str;
1643 479 : for (char *src = yylval->sval + 1; *src; dst++)
1644 429 : if ((*dst = *src++) == '\'' && *src == '\'')
1645 3 : src++;
1646 50 : *dst = 0;
1647 : } else {
1648 825049 : if (GDKstrFromStr((unsigned char *)str,
1649 825049 : (unsigned char *)yylval->sval + 1,
1650 825049 : lc->yycur - lc->yysval - 1,
1651 : '\'') < 0) {
1652 1 : sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
1653 1 : return LEX_ERROR;
1654 : }
1655 : }
1656 : break;
1657 : }
1658 2162944 : yylval->sval = str;
1659 :
1660 : /* reset original */
1661 2162944 : lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
1662 : } else {
1663 21551475 : lc->next_string_is_raw = false;
1664 : }
1665 :
1666 : return(token);
1667 : }
1668 :
1669 : static int scanner( YYSTYPE *yylval, void *m, bool log);
1670 :
1671 : static int
1672 29300928 : scanner(YYSTYPE * yylval, void *parm, bool log)
1673 : {
1674 29300928 : int token;
1675 29300928 : mvc *c = (mvc *) parm;
1676 29300928 : struct scanner *lc = &c->scanner;
1677 29300928 : size_t pos;
1678 29300928 : int last = lc->yyval;
1679 :
1680 : /* store position for when view's query ends */
1681 29300928 : pos = lc->rs->pos + lc->yycur;
1682 :
1683 29300928 : token = sql_get_next_token(yylval, parm);
1684 29295399 : if (token == '[')
1685 292 : lc->brackets++;
1686 29295399 : if (token == ']')
1687 295 : lc->brackets--;
1688 :
1689 : /* TODO make hash out of the possible complex tokens and add with current tokens hash */
1690 29295399 : if (token == -IDENT || token == -STRING) {
1691 17 : char *sval = yylval->sval;
1692 17 : int next = sql_get_next_token(yylval, parm);
1693 :
1694 17 : if (token == -STRING && next == STRING) {
1695 2 : sval = sa_strconcat(c->sa, sval, yylval->sval);
1696 6 : while((next = sql_get_next_token(yylval, parm)) == STRING)
1697 4 : sval = sa_strconcat(c->sa, sval, yylval->sval);
1698 : }
1699 :
1700 17 : char *uescape = "\\";
1701 17 : if (next == UESCAPE) {
1702 14 : int nxt = sql_get_next_token(yylval, parm);
1703 14 : if (nxt == STRING) {
1704 14 : next = 0;
1705 14 : uescape = yylval->sval;
1706 14 : if (strlen(uescape) != 1 || strchr("\"'0123456789abcdefABCDEF+ \t\n\r\f", *uescape) != NULL) {
1707 0 : sqlformaterror(c, SQLSTATE(22019) "%s", "UESCAPE must be one character");
1708 0 : return LEX_ERROR;
1709 : }
1710 : } else {
1711 0 : sqlformaterror(c, SQLSTATE(22019) "%s", "UESCAPE character missing");
1712 0 : return LEX_ERROR;
1713 : }
1714 : }
1715 17 : yylval->sval = uescape_xform(sval, uescape);
1716 17 : if (yylval->sval == NULL && token == -STRING) {
1717 0 : sqlformaterror(c, SQLSTATE(22019) "%s", "Bad Unicode string");
1718 0 : return LEX_ERROR;
1719 : }
1720 :
1721 17 : if (next)
1722 3 : lc->yynext = next;
1723 17 : return (token == -IDENT)?IDENT:STRING;
1724 29295382 : } else if (token == WITH) { /* check for TIME WITH ... */
1725 15572 : int next = sql_get_next_token(yylval, parm);
1726 15572 : if (next == TIME)
1727 5416 : token = WITH_LA;
1728 15572 : lc->yynext = next;
1729 29279810 : } else if (token == INTO) { /* check for INTO followed by STRING / (BIG/LITTLE/NATIVE) for copy into file vs copy select into var */
1730 140215 : int next = sql_get_next_token(yylval, parm);
1731 140156 : if (next == STRING || next == BIG || next == LITTLE || next == NATIVE ||
1732 : next == BINARY || next == STDOUT)
1733 77 : token = INTO_LA;
1734 140156 : lc->yynext = next;
1735 29139595 : } else if (last == ODBC_FUNC_ESCAPE_PREFIX && token == TIMESTAMPADD) {
1736 : token = ODBC_TIMESTAMPADD;
1737 29139860 : } else if (last == ODBC_FUNC_ESCAPE_PREFIX && token == TIMESTAMPDIFF) {
1738 : token = ODBC_TIMESTAMPDIFF;
1739 29139975 : } else if (last == INTERVAL && (token == '-' || token == '+')) { /* backward compatibility: INTERVAL +- 'string' -> interval '+-string'*/
1740 12 : int next = sql_get_next_token(yylval, parm);
1741 12 : if (next == STRING) {
1742 12 : if (token != '+') {
1743 8 : char *sval = yylval->sval;
1744 8 : if (sval[0] == '+')
1745 0 : sval[0] = '-';
1746 8 : else if (sval[0] == '-')
1747 1 : yylval->sval++;
1748 : else
1749 7 : yylval->sval = sa_strconcat(c->sa, token=='-'?"-":"+", sval);
1750 : }
1751 : token = next;
1752 : next = 0;
1753 : }
1754 12 : lc->yynext = next;
1755 29139963 : } else if (token == OUTER) { /* check for OUTER UNION */
1756 18457 : int next = sql_get_next_token(yylval, parm);
1757 18457 : if (next == UNION)
1758 : token = OUTER_UNION;
1759 : else
1760 18448 : lc->yynext = next;
1761 29121506 : } else if (token == TO) { /* check for end_field (of interval spec) TO (MONTH etc) */
1762 121026 : int next = sql_get_next_token(yylval, parm);
1763 121026 : if (next == YEAR || next == MONTH || next == DAY || next == HOUR || next == MINUTE || next == SECOND)
1764 273 : token = TO_LA;
1765 121026 : lc->yynext = next;
1766 29000480 : } else if (token == NOT) {
1767 78228 : int next = sql_get_next_token(yylval, parm);
1768 :
1769 78228 : if (next == NOT) {
1770 2 : return scanner(yylval, parm, false);
1771 : } else if (next == EXISTS) {
1772 : token = NOT_EXISTS;
1773 : } else if (next == BETWEEN) {
1774 : token = NOT_BETWEEN;
1775 : } else if (next == sqlIN) {
1776 : token = NOT_IN;
1777 : } else if (next == LIKE) {
1778 : token = NOT_LIKE;
1779 : } else if (next == ILIKE) {
1780 : token = NOT_ILIKE;
1781 : } else {
1782 63281 : lc->yynext = next;
1783 : }
1784 28922252 : } else if (token == ':' && !lc->brackets) {
1785 80 : int next = sql_get_next_token(yylval, parm);
1786 80 : if (next == IDENT)
1787 : token = PARAM;
1788 : else
1789 66 : lc->yynext = next;
1790 28922172 : } else if (token == SCOLON) {
1791 : /* ignore semi-colon(s) following a semi-colon */
1792 1046562 : if (lc->yylast == SCOLON) {
1793 131730 : size_t prev = lc->yycur;
1794 131731 : while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
1795 1 : prev = lc->yycur;
1796 :
1797 : /* skip the skipped stuff also in the buffer */
1798 131736 : lc->rs->pos += prev;
1799 131736 : lc->yycur -= prev;
1800 : }
1801 : }
1802 :
1803 29295327 : if (lc->log && log)
1804 0 : mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
1805 :
1806 29295327 : lc->started += (token != EOF);
1807 29295327 : return token;
1808 : }
1809 :
1810 : /* also see sql_parser.y */
1811 : extern int sqllex(YYSTYPE * yylval, void *parm);
1812 :
1813 : int
1814 29301800 : sqllex(YYSTYPE * yylval, void *parm)
1815 : {
1816 29301800 : return scanner(yylval, parm, true);
1817 : }
|