Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include <wctype.h>
15 : #include "sql_mem.h"
16 : #include "sql_scan.h"
17 : #include "sql_types.h"
18 : #include "sql_symbol.h"
19 : #include "sql_mvc.h"
20 : #include "sql_parser.tab.h"
21 : #include "sql_semantic.h"
22 : #include "sql_parser.h" /* for sql_error() */
23 :
24 : #include "stream.h"
25 : #include "mapi_prompt.h"
26 : #include <unistd.h>
27 : #include <string.h>
28 : #include <ctype.h>
29 : #include "sql_keyword.h"
30 :
31 : /**
32 : * Removes all comments before the query. In query comments are kept.
33 : */
34 : char *
35 389593 : query_cleaned(sql_allocator *sa, const char *query)
36 : {
37 389593 : char *q, *r, *c = NULL;
38 389593 : int lines = 0;
39 389593 : int quote = 0; /* inside quotes ('..', "..", {..}) */
40 389593 : bool bs = false; /* seen a backslash in a quoted string */
41 389593 : bool incomment1 = false; /* inside traditional C style comment */
42 389593 : bool incomment2 = false; /* inside comment starting with -- */
43 389593 : bool inline_comment = false;
44 :
45 389593 : r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
46 389598 : if(!r)
47 : return NULL;
48 :
49 63212704 : for (q = r; *query; query++) {
50 62823106 : if (incomment1) {
51 15976 : if (*query == '/' && query[-1] == '*') {
52 231 : incomment1 = false;
53 231 : if (c == r && lines > 0) {
54 223 : q = r; // reset to beginning
55 223 : lines = 0;
56 223 : continue;
57 : }
58 : }
59 15753 : if (*query == '\n') lines++;
60 15753 : *q++ = *query;
61 62807130 : } else if (incomment2) {
62 825093 : if (*query == '\n') {
63 2770 : incomment2 = false;
64 2770 : inline_comment = false;
65 : /* add newline only if comment doesn't
66 : * occupy whole line */
67 2770 : if (q > r && q[-1] != '\n'){
68 887 : *q++ = '\n';
69 887 : lines++;
70 : }
71 822323 : } else if (inline_comment){
72 18166 : *q++ = *query; // preserve in line query comments
73 : }
74 61982037 : } else if (quote) {
75 18362623 : if (bs) {
76 : bs = false;
77 18359427 : } else if (*query == '\\') {
78 : bs = true;
79 18356231 : } else if (*query == quote) {
80 677699 : quote = 0;
81 : }
82 18362623 : *q++ = *query;
83 43619414 : } else if (*query == '"' || *query == '\'') {
84 677251 : quote = *query;
85 677251 : *q++ = *query;
86 42942163 : } else if (*query == '{') {
87 493 : quote = '}';
88 493 : *q++ = *query;
89 42941670 : } else if (*query == '-' && query[1] == '-') {
90 2770 : if (q > r && q[-1] != '\n') {
91 887 : inline_comment = true;
92 887 : *q++ = *query; // preserve in line query comments
93 : }
94 : incomment2 = true;
95 42938900 : } else if (*query == '/' && query[1] == '*') {
96 231 : incomment1 = true;
97 231 : c = q;
98 231 : *q++ = *query;
99 42938669 : } else if (*query == '\n') {
100 : /* collapse newlines */
101 877280 : if (q > r && q[-1] != '\n') {
102 835307 : *q++ = '\n';
103 835307 : lines++;
104 : }
105 42061389 : } else if (*query == ' ' || *query == '\t') {
106 : /* collapse white space */
107 7135912 : if (q > r && q[-1] != ' ')
108 5508521 : *q++ = ' ';
109 : } else {
110 34925477 : *q++ = *query;
111 : }
112 : }
113 389598 : *q = 0;
114 389598 : return r;
115 : }
116 :
117 : int
118 336 : scanner_init_keywords(void)
119 : {
120 336 : int failed = 0;
121 :
122 336 : failed += keywords_insert("false", BOOL_FALSE);
123 336 : failed += keywords_insert("true", BOOL_TRUE);
124 :
125 336 : failed += keywords_insert("ALTER", ALTER);
126 336 : failed += keywords_insert("ADD", ADD);
127 336 : failed += keywords_insert("AND", AND);
128 :
129 336 : failed += keywords_insert("RANK", RANK);
130 336 : failed += keywords_insert("DENSE_RANK", RANK);
131 336 : failed += keywords_insert("PERCENT_RANK", RANK);
132 336 : failed += keywords_insert("CUME_DIST", RANK);
133 336 : failed += keywords_insert("ROW_NUMBER", RANK);
134 336 : failed += keywords_insert("NTILE", RANK);
135 336 : failed += keywords_insert("LAG", RANK);
136 336 : failed += keywords_insert("LEAD", RANK);
137 336 : failed += keywords_insert("FETCH", FETCH);
138 336 : failed += keywords_insert("FIRST_VALUE", RANK);
139 336 : failed += keywords_insert("LAST_VALUE", RANK);
140 336 : failed += keywords_insert("NTH_VALUE", RANK);
141 :
142 336 : failed += keywords_insert("BEST", BEST);
143 336 : failed += keywords_insert("EFFORT", EFFORT);
144 :
145 336 : failed += keywords_insert("AS", AS);
146 336 : failed += keywords_insert("ASC", ASC);
147 336 : failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
148 336 : failed += keywords_insert("BETWEEN", BETWEEN);
149 336 : failed += keywords_insert("SYMMETRIC", SYMMETRIC);
150 336 : failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
151 336 : failed += keywords_insert("BY", BY);
152 336 : failed += keywords_insert("CAST", CAST);
153 336 : failed += keywords_insert("CONVERT", CONVERT);
154 336 : failed += keywords_insert("CHARACTER", CHARACTER);
155 336 : failed += keywords_insert("CHAR", CHARACTER);
156 336 : failed += keywords_insert("VARYING", VARYING);
157 336 : failed += keywords_insert("VARCHAR", VARCHAR);
158 336 : failed += keywords_insert("BINARY", BINARY);
159 336 : failed += keywords_insert("LARGE", LARGE);
160 336 : failed += keywords_insert("OBJECT", OBJECT);
161 336 : failed += keywords_insert("CLOB", CLOB);
162 336 : failed += keywords_insert("BLOB", sqlBLOB);
163 336 : failed += keywords_insert("TEXT", sqlTEXT);
164 336 : failed += keywords_insert("TINYTEXT", sqlTEXT);
165 336 : failed += keywords_insert("STRING", CLOB); /* ? */
166 336 : failed += keywords_insert("CHECK", CHECK);
167 336 : failed += keywords_insert("CLIENT", CLIENT);
168 336 : failed += keywords_insert("SERVER", SERVER);
169 336 : failed += keywords_insert("COMMENT", COMMENT);
170 336 : failed += keywords_insert("CONSTRAINT", CONSTRAINT);
171 336 : failed += keywords_insert("CREATE", CREATE);
172 336 : failed += keywords_insert("CROSS", CROSS);
173 336 : failed += keywords_insert("COPY", COPY);
174 336 : failed += keywords_insert("RECORDS", RECORDS);
175 336 : failed += keywords_insert("DELIMITERS", DELIMITERS);
176 336 : failed += keywords_insert("STDIN", STDIN);
177 336 : failed += keywords_insert("STDOUT", STDOUT);
178 :
179 336 : failed += keywords_insert("TINYINT", TINYINT);
180 336 : failed += keywords_insert("SMALLINT", SMALLINT);
181 336 : failed += keywords_insert("INTEGER", sqlINTEGER);
182 336 : failed += keywords_insert("INT", sqlINTEGER);
183 336 : failed += keywords_insert("MEDIUMINT", sqlINTEGER);
184 336 : failed += keywords_insert("BIGINT", BIGINT);
185 : #ifdef HAVE_HGE
186 336 : failed += keywords_insert("HUGEINT", HUGEINT);
187 : #endif
188 336 : failed += keywords_insert("DEC", sqlDECIMAL);
189 336 : failed += keywords_insert("DECIMAL", sqlDECIMAL);
190 336 : failed += keywords_insert("NUMERIC", sqlDECIMAL);
191 336 : failed += keywords_insert("DECLARE", DECLARE);
192 336 : failed += keywords_insert("DEFAULT", DEFAULT);
193 336 : failed += keywords_insert("DESC", DESC);
194 336 : failed += keywords_insert("DISTINCT", DISTINCT);
195 336 : failed += keywords_insert("DOUBLE", sqlDOUBLE);
196 336 : failed += keywords_insert("REAL", sqlREAL);
197 336 : failed += keywords_insert("DROP", DROP);
198 336 : failed += keywords_insert("ESCAPE", ESCAPE);
199 336 : failed += keywords_insert("EXISTS", EXISTS);
200 336 : failed += keywords_insert("UESCAPE", UESCAPE);
201 336 : failed += keywords_insert("EXTRACT", EXTRACT);
202 336 : failed += keywords_insert("FLOAT", sqlFLOAT);
203 336 : failed += keywords_insert("FOR", FOR);
204 336 : failed += keywords_insert("FOREIGN", FOREIGN);
205 336 : failed += keywords_insert("FROM", FROM);
206 336 : failed += keywords_insert("FWF", FWF);
207 :
208 336 : failed += keywords_insert("BIG", BIG);
209 336 : failed += keywords_insert("LITTLE", LITTLE);
210 336 : failed += keywords_insert("NATIVE", NATIVE);
211 336 : failed += keywords_insert("ENDIAN", ENDIAN);
212 :
213 336 : failed += keywords_insert("REFERENCES", REFERENCES);
214 :
215 336 : failed += keywords_insert("MATCH", MATCH);
216 336 : failed += keywords_insert("FULL", FULL);
217 336 : failed += keywords_insert("PARTIAL", PARTIAL);
218 336 : failed += keywords_insert("SIMPLE", SIMPLE);
219 :
220 336 : failed += keywords_insert("INSERT", INSERT);
221 336 : failed += keywords_insert("UPDATE", UPDATE);
222 336 : failed += keywords_insert("DELETE", sqlDELETE);
223 336 : failed += keywords_insert("TRUNCATE", TRUNCATE);
224 336 : failed += keywords_insert("MATCHED", MATCHED);
225 :
226 336 : failed += keywords_insert("ACTION", ACTION);
227 336 : failed += keywords_insert("CASCADE", CASCADE);
228 336 : failed += keywords_insert("RESTRICT", RESTRICT);
229 336 : failed += keywords_insert("FIRST", FIRST);
230 336 : failed += keywords_insert("GLOBAL", GLOBAL);
231 336 : failed += keywords_insert("GROUP", sqlGROUP);
232 336 : failed += keywords_insert("GROUPING", GROUPING);
233 336 : failed += keywords_insert("ROLLUP", ROLLUP);
234 336 : failed += keywords_insert("CUBE", CUBE);
235 336 : failed += keywords_insert("HAVING", HAVING);
236 336 : failed += keywords_insert("ILIKE", ILIKE);
237 336 : failed += keywords_insert("IMPRINTS", IMPRINTS);
238 336 : failed += keywords_insert("IN", sqlIN);
239 336 : failed += keywords_insert("INNER", INNER);
240 336 : failed += keywords_insert("INTO", INTO);
241 336 : failed += keywords_insert("IS", IS);
242 336 : failed += keywords_insert("JOIN", JOIN);
243 336 : failed += keywords_insert("KEY", KEY);
244 336 : failed += keywords_insert("LATERAL", LATERAL);
245 336 : failed += keywords_insert("LEFT", LEFT);
246 336 : failed += keywords_insert("LIKE", LIKE);
247 336 : failed += keywords_insert("LIMIT", LIMIT);
248 336 : failed += keywords_insert("SAMPLE", SAMPLE);
249 336 : failed += keywords_insert("SEED", SEED);
250 336 : failed += keywords_insert("LAST", LAST);
251 336 : failed += keywords_insert("LOCAL", LOCAL);
252 336 : failed += keywords_insert("NATURAL", NATURAL);
253 336 : failed += keywords_insert("NOT", NOT);
254 336 : failed += keywords_insert("NULL", sqlNULL);
255 336 : failed += keywords_insert("NULLS", NULLS);
256 336 : failed += keywords_insert("OFFSET", OFFSET);
257 336 : failed += keywords_insert("ON", ON);
258 336 : failed += keywords_insert("OPTIONS", OPTIONS);
259 336 : failed += keywords_insert("OPTION", OPTION);
260 336 : failed += keywords_insert("OR", OR);
261 336 : failed += keywords_insert("ORDER", ORDER);
262 336 : failed += keywords_insert("ORDERED", ORDERED);
263 336 : failed += keywords_insert("OUTER", OUTER);
264 336 : failed += keywords_insert("OVER", OVER);
265 336 : failed += keywords_insert("PARTITION", PARTITION);
266 336 : failed += keywords_insert("PATH", PATH);
267 336 : failed += keywords_insert("PRECISION", PRECISION);
268 336 : failed += keywords_insert("PRIMARY", PRIMARY);
269 :
270 336 : failed += keywords_insert("USER", USER);
271 336 : failed += keywords_insert("RENAME", RENAME);
272 336 : failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
273 336 : failed += keywords_insert("ENCRYPTED", ENCRYPTED);
274 336 : failed += keywords_insert("PASSWORD", PASSWORD);
275 336 : failed += keywords_insert("GRANT", GRANT);
276 336 : failed += keywords_insert("REVOKE", REVOKE);
277 336 : failed += keywords_insert("ROLE", ROLE);
278 336 : failed += keywords_insert("ADMIN", ADMIN);
279 336 : failed += keywords_insert("PRIVILEGES", PRIVILEGES);
280 336 : failed += keywords_insert("PUBLIC", PUBLIC);
281 336 : failed += keywords_insert("CURRENT_USER", CURRENT_USER);
282 336 : failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
283 336 : failed += keywords_insert("SESSION_USER", SESSION_USER);
284 336 : failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
285 336 : failed += keywords_insert("SESSION", sqlSESSION);
286 336 : failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
287 336 : failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
288 336 : failed += keywords_insert("OPTIMIZER", OPTIMIZER);
289 :
290 336 : failed += keywords_insert("RIGHT", RIGHT);
291 336 : failed += keywords_insert("SCHEMA", SCHEMA);
292 336 : failed += keywords_insert("SELECT", SELECT);
293 336 : failed += keywords_insert("SET", SET);
294 336 : failed += keywords_insert("SETS", SETS);
295 336 : failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
296 :
297 336 : failed += keywords_insert("ALL", ALL);
298 336 : failed += keywords_insert("ANY", ANY);
299 336 : failed += keywords_insert("SOME", SOME);
300 336 : failed += keywords_insert("EVERY", ANY);
301 : /*
302 : failed += keywords_insert("SQLCODE", SQLCODE );
303 : */
304 336 : failed += keywords_insert("COLUMN", COLUMN);
305 336 : failed += keywords_insert("TABLE", TABLE);
306 336 : failed += keywords_insert("TEMPORARY", TEMPORARY);
307 336 : failed += keywords_insert("TEMP", TEMP);
308 336 : failed += keywords_insert("REMOTE", REMOTE);
309 336 : failed += keywords_insert("MERGE", MERGE);
310 336 : failed += keywords_insert("REPLICA", REPLICA);
311 336 : failed += keywords_insert("UNLOGGED", UNLOGGED);
312 336 : failed += keywords_insert("TO", TO);
313 336 : failed += keywords_insert("UNION", UNION);
314 336 : failed += keywords_insert("EXCEPT", EXCEPT);
315 336 : failed += keywords_insert("INTERSECT", INTERSECT);
316 336 : failed += keywords_insert("CORRESPONDING", CORRESPONDING);
317 336 : failed += keywords_insert("UNIQUE", UNIQUE);
318 336 : failed += keywords_insert("USING", USING);
319 336 : failed += keywords_insert("VALUES", VALUES);
320 336 : failed += keywords_insert("VIEW", VIEW);
321 336 : failed += keywords_insert("WHERE", WHERE);
322 336 : failed += keywords_insert("WITH", WITH);
323 336 : failed += keywords_insert("WITHOUT", WITHOUT);
324 336 : failed += keywords_insert("DATA", DATA);
325 :
326 336 : failed += keywords_insert("DATE", sqlDATE);
327 336 : failed += keywords_insert("TIME", TIME);
328 336 : failed += keywords_insert("TIMESTAMP", TIMESTAMP);
329 336 : failed += keywords_insert("INTERVAL", INTERVAL);
330 336 : failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
331 336 : failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
332 336 : failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
333 336 : failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
334 336 : failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
335 336 : failed += keywords_insert("LOCALTIME", LOCALTIME);
336 336 : failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
337 336 : failed += keywords_insert("ZONE", ZONE);
338 :
339 336 : failed += keywords_insert("CENTURY", CENTURY);
340 336 : failed += keywords_insert("DECADE", DECADE);
341 336 : failed += keywords_insert("YEAR", YEAR);
342 336 : failed += keywords_insert("QUARTER", QUARTER);
343 336 : failed += keywords_insert("MONTH", MONTH);
344 336 : failed += keywords_insert("WEEK", WEEK);
345 336 : failed += keywords_insert("DOW", DOW);
346 336 : failed += keywords_insert("DOY", DOY);
347 336 : failed += keywords_insert("DAY", DAY);
348 336 : failed += keywords_insert("HOUR", HOUR);
349 336 : failed += keywords_insert("MINUTE", MINUTE);
350 336 : failed += keywords_insert("SECOND", SECOND);
351 336 : failed += keywords_insert("EPOCH", EPOCH);
352 :
353 336 : failed += keywords_insert("POSITION", POSITION);
354 336 : failed += keywords_insert("SUBSTRING", SUBSTRING);
355 336 : failed += keywords_insert("SPLIT_PART", SPLIT_PART);
356 336 : failed += keywords_insert("TRIM", TRIM);
357 336 : failed += keywords_insert("LEADING", LEADING);
358 336 : failed += keywords_insert("TRAILING", TRAILING);
359 336 : failed += keywords_insert("BOTH", BOTH);
360 :
361 336 : failed += keywords_insert("CASE", CASE);
362 336 : failed += keywords_insert("WHEN", WHEN);
363 336 : failed += keywords_insert("THEN", THEN);
364 336 : failed += keywords_insert("ELSE", ELSE);
365 336 : failed += keywords_insert("END", END);
366 336 : failed += keywords_insert("NULLIF", NULLIF);
367 336 : failed += keywords_insert("COALESCE", COALESCE);
368 336 : failed += keywords_insert("ELSEIF", ELSEIF);
369 336 : failed += keywords_insert("IF", IF);
370 336 : failed += keywords_insert("WHILE", WHILE);
371 336 : failed += keywords_insert("DO", DO);
372 :
373 336 : failed += keywords_insert("COMMIT", COMMIT);
374 336 : failed += keywords_insert("ROLLBACK", ROLLBACK);
375 336 : failed += keywords_insert("SAVEPOINT", SAVEPOINT);
376 336 : failed += keywords_insert("RELEASE", RELEASE);
377 336 : failed += keywords_insert("WORK", WORK);
378 336 : failed += keywords_insert("CHAIN", CHAIN);
379 336 : failed += keywords_insert("PRESERVE", PRESERVE);
380 336 : failed += keywords_insert("ROWS", ROWS);
381 336 : failed += keywords_insert("NO", NO);
382 336 : failed += keywords_insert("START", START);
383 336 : failed += keywords_insert("TRANSACTION", TRANSACTION);
384 336 : failed += keywords_insert("READ", READ);
385 336 : failed += keywords_insert("WRITE", WRITE);
386 336 : failed += keywords_insert("ONLY", ONLY);
387 336 : failed += keywords_insert("ISOLATION", ISOLATION);
388 336 : failed += keywords_insert("LEVEL", LEVEL);
389 336 : failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
390 336 : failed += keywords_insert("COMMITTED", COMMITTED);
391 336 : failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
392 336 : failed += keywords_insert("SNAPSHOT", SNAPSHOT);
393 336 : failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
394 336 : failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
395 336 : failed += keywords_insert("SIZE", sqlSIZE);
396 336 : failed += keywords_insert("STORAGE", STORAGE);
397 :
398 336 : failed += keywords_insert("TYPE", TYPE);
399 336 : failed += keywords_insert("PROCEDURE", PROCEDURE);
400 336 : failed += keywords_insert("FUNCTION", FUNCTION);
401 336 : failed += keywords_insert("LOADER", sqlLOADER);
402 336 : failed += keywords_insert("REPLACE", REPLACE);
403 :
404 336 : failed += keywords_insert("FIELD", FIELD);
405 336 : failed += keywords_insert("FILTER", FILTER);
406 336 : failed += keywords_insert("AGGREGATE", AGGREGATE);
407 336 : failed += keywords_insert("RETURNS", RETURNS);
408 336 : failed += keywords_insert("EXTERNAL", EXTERNAL);
409 336 : failed += keywords_insert("NAME", sqlNAME);
410 336 : failed += keywords_insert("RETURN", RETURN);
411 336 : failed += keywords_insert("CALL", CALL);
412 336 : failed += keywords_insert("LANGUAGE", LANGUAGE);
413 :
414 336 : failed += keywords_insert("ANALYZE", ANALYZE);
415 336 : failed += keywords_insert("MINMAX", MINMAX);
416 336 : failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
417 336 : failed += keywords_insert("PLAN", SQL_PLAN);
418 336 : failed += keywords_insert("TRACE", SQL_TRACE);
419 336 : failed += keywords_insert("PREPARE", PREPARE);
420 336 : failed += keywords_insert("PREP", PREP);
421 336 : failed += keywords_insert("EXECUTE", EXECUTE);
422 336 : failed += keywords_insert("EXEC", EXEC);
423 336 : failed += keywords_insert("DEALLOCATE", DEALLOCATE);
424 :
425 336 : failed += keywords_insert("INDEX", INDEX);
426 :
427 336 : failed += keywords_insert("SEQUENCE", SEQUENCE);
428 336 : failed += keywords_insert("RESTART", RESTART);
429 336 : failed += keywords_insert("INCREMENT", INCREMENT);
430 336 : failed += keywords_insert("MAXVALUE", MAXVALUE);
431 336 : failed += keywords_insert("MINVALUE", MINVALUE);
432 336 : failed += keywords_insert("CYCLE", CYCLE);
433 336 : failed += keywords_insert("CACHE", CACHE);
434 336 : failed += keywords_insert("NEXT", NEXT);
435 336 : failed += keywords_insert("VALUE", VALUE);
436 336 : failed += keywords_insert("GENERATED", GENERATED);
437 336 : failed += keywords_insert("ALWAYS", ALWAYS);
438 336 : failed += keywords_insert("IDENTITY", IDENTITY);
439 336 : failed += keywords_insert("SERIAL", SERIAL);
440 336 : failed += keywords_insert("BIGSERIAL", BIGSERIAL);
441 336 : failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
442 336 : failed += keywords_insert("CONTINUE", CONTINUE);
443 :
444 336 : failed += keywords_insert("TRIGGER", TRIGGER);
445 336 : failed += keywords_insert("ATOMIC", ATOMIC);
446 336 : failed += keywords_insert("BEGIN", BEGIN);
447 336 : failed += keywords_insert("OF", OF);
448 336 : failed += keywords_insert("BEFORE", BEFORE);
449 336 : failed += keywords_insert("AFTER", AFTER);
450 336 : failed += keywords_insert("ROW", ROW);
451 336 : failed += keywords_insert("STATEMENT", STATEMENT);
452 336 : failed += keywords_insert("NEW", sqlNEW);
453 336 : failed += keywords_insert("OLD", OLD);
454 336 : failed += keywords_insert("EACH", EACH);
455 336 : failed += keywords_insert("REFERENCING", REFERENCING);
456 :
457 336 : failed += keywords_insert("RANGE", RANGE);
458 336 : failed += keywords_insert("UNBOUNDED", UNBOUNDED);
459 336 : failed += keywords_insert("PRECEDING", PRECEDING);
460 336 : failed += keywords_insert("FOLLOWING", FOLLOWING);
461 336 : failed += keywords_insert("CURRENT", CURRENT);
462 336 : failed += keywords_insert("EXCLUDE", EXCLUDE);
463 336 : failed += keywords_insert("OTHERS", OTHERS);
464 336 : failed += keywords_insert("TIES", TIES);
465 336 : failed += keywords_insert("GROUPS", GROUPS);
466 336 : failed += keywords_insert("WINDOW", WINDOW);
467 :
468 : /* special SQL/XML keywords */
469 336 : failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
470 336 : failed += keywords_insert("XMLCONCAT", XMLCONCAT);
471 336 : failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
472 336 : failed += keywords_insert("XMLELEMENT", XMLELEMENT);
473 336 : failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
474 336 : failed += keywords_insert("XMLFOREST", XMLFOREST);
475 336 : failed += keywords_insert("XMLPARSE", XMLPARSE);
476 336 : failed += keywords_insert("STRIP", STRIP);
477 336 : failed += keywords_insert("WHITESPACE", WHITESPACE);
478 336 : failed += keywords_insert("XMLPI", XMLPI);
479 336 : failed += keywords_insert("XMLQUERY", XMLQUERY);
480 336 : failed += keywords_insert("PASSING", PASSING);
481 336 : failed += keywords_insert("XMLTEXT", XMLTEXT);
482 336 : failed += keywords_insert("NIL", NIL);
483 336 : failed += keywords_insert("REF", REF);
484 336 : failed += keywords_insert("ABSENT", ABSENT);
485 336 : failed += keywords_insert("DOCUMENT", DOCUMENT);
486 336 : failed += keywords_insert("ELEMENT", ELEMENT);
487 336 : failed += keywords_insert("CONTENT", CONTENT);
488 336 : failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
489 336 : failed += keywords_insert("NAMESPACE", NAMESPACE);
490 336 : failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
491 336 : failed += keywords_insert("RETURNING", RETURNING);
492 336 : failed += keywords_insert("LOCATION", LOCATION);
493 336 : failed += keywords_insert("ID", ID);
494 336 : failed += keywords_insert("ACCORDING", ACCORDING);
495 336 : failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
496 336 : failed += keywords_insert("URI", URI);
497 336 : failed += keywords_insert("XMLAGG", XMLAGG);
498 :
499 : /* keywords for opengis */
500 336 : failed += keywords_insert("GEOMETRY", GEOMETRY);
501 :
502 336 : failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
503 336 : failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
504 336 : failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
505 336 : failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
506 336 : failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
507 336 : failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
508 336 : failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
509 :
510 336 : failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
511 336 : failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
512 336 : failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
513 336 : failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
514 336 : failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
515 336 : failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
516 336 : failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
517 :
518 336 : failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
519 336 : failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
520 336 : failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
521 336 : failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
522 336 : failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
523 336 : failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
524 336 : failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
525 :
526 336 : failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
527 336 : failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
528 336 : failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
529 336 : failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
530 336 : failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
531 336 : failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
532 336 : failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
533 336 : failed += keywords_insert("LOGIN", LOGIN);
534 : // odbc keywords
535 336 : failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
536 336 : failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
537 336 : failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
538 336 : failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
539 336 : failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
540 336 : failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
541 336 : failed += keywords_insert("DAYNAME", DAYNAME);
542 336 : failed += keywords_insert("IFNULL", IFNULL);
543 336 : failed += keywords_insert("MONTHNAME", MONTHNAME);
544 336 : failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
545 336 : failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
546 336 : failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
547 336 : failed += keywords_insert("SQL_BINARY", SQL_BINARY);
548 336 : failed += keywords_insert("SQL_BIT", SQL_BIT);
549 336 : failed += keywords_insert("SQL_CHAR", SQL_CHAR);
550 336 : failed += keywords_insert("SQL_DATE", SQL_DATE);
551 336 : failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
552 336 : failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
553 336 : failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
554 336 : failed += keywords_insert("SQL_GUID", SQL_GUID);
555 336 : failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
556 336 : failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
557 336 : failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
558 336 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
559 336 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
560 336 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
561 336 : failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
562 336 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
563 336 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
564 336 : failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
565 336 : failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
566 336 : failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
567 336 : failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
568 336 : failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
569 336 : failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
570 336 : failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
571 336 : failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
572 336 : failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
573 336 : failed += keywords_insert("SQL_REAL", SQL_REAL);
574 336 : failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
575 336 : failed += keywords_insert("SQL_TIME", SQL_TIME);
576 336 : failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
577 336 : failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
578 336 : failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
579 336 : failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
580 336 : failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
581 336 : failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
582 336 : failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
583 336 : failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
584 336 : failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
585 336 : failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
586 336 : failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
587 336 : failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
588 336 : failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
589 336 : failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
590 336 : failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
591 336 : failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
592 :
593 336 : failed += keywords_insert("LEAST", MARGFUNC);
594 336 : failed += keywords_insert("GREATEST", MARGFUNC);
595 336 : return failed;
596 : }
597 :
598 : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
599 :
600 : void
601 238296 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
602 : {
603 476592 : *s = (struct scanner) {
604 : .rs = rs,
605 : .ws = ws,
606 : .mode = LINE_N,
607 238296 : .raw_string_mode = GDKgetenv_istrue("raw_strings"),
608 : };
609 238296 : }
610 :
611 : void
612 1260415 : scanner_query_processed(struct scanner *s)
613 : {
614 1260415 : int cur;
615 :
616 1260415 : if (s->yybak) {
617 488213 : s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
618 488213 : s->yybak = 0;
619 : }
620 1260415 : if (s->rs) {
621 1260415 : s->rs->pos += s->yycur;
622 : /* completely eat the query including white space after the ; */
623 2394044 : while (s->rs->pos < s->rs->len &&
624 2069664 : (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
625 1133629 : s->rs->pos++;
626 : }
627 : }
628 : /*assert(s->rs->pos <= s->rs->len);*/
629 1260415 : s->yycur = 0;
630 1260415 : s->started = 0;
631 1260415 : s->as = 0;
632 1260415 : s->schema = NULL;
633 1260415 : }
634 :
635 : static int
636 33 : scanner_error(mvc *lc, int cur)
637 : {
638 33 : switch (cur) {
639 0 : case EOF:
640 0 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
641 0 : return EOF;
642 33 : default:
643 : /* on Windows at least, iswcntrl returns TRUE for
644 : * U+FEFF, but we just want consistent error
645 : * messages */
646 33 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
647 : }
648 33 : return LEX_ERROR;
649 : }
650 :
651 :
652 : /*
653 : UTF-8 encoding is as follows:
654 : U-00000000 - U-0000007F: 0xxxxxxx
655 : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
656 : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
657 : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
658 : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
659 : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
660 : */
661 : /* To be correctly coded UTF-8, the sequence should be the shortest
662 : possible encoding of the value being encoded. This means that for
663 : an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
664 : utf8chkmsk[n] should be non-zero (else the encoding could be
665 : shorter).
666 : */
667 : static const int utf8chkmsk[] = {
668 : 0x0000007f,
669 : 0x00000780,
670 : 0x0000f800,
671 : 0x001f0000,
672 : 0x03e00000,
673 : 0x7c000000
674 : };
675 :
676 : static void
677 30633609 : utf8_putchar(struct scanner *lc, int ch)
678 : {
679 30633609 : if ((ch) < 0x80) {
680 30633604 : lc->yycur--;
681 5 : } else if ((ch) < 0x800) {
682 0 : lc->yycur -= 2;
683 5 : } else if ((ch) < 0x10000) {
684 5 : lc->yycur -= 3;
685 : } else {
686 0 : lc->yycur -= 4;
687 : }
688 30633609 : }
689 :
690 : static inline int
691 133757971 : scanner_read_more(struct scanner *lc, size_t n)
692 : {
693 133757971 : bstream *b = lc->rs;
694 133757971 : bool more = false;
695 :
696 :
697 133761788 : while (b->len < b->pos + lc->yycur + n) {
698 :
699 138819 : if (lc->mode == LINE_1 || !lc->started)
700 : return EOF;
701 :
702 : /* query is not finished ask for more */
703 6272 : if (b->eof || !isa_block_stream(b->s)) {
704 4360 : if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
705 1912 : mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
706 1912 : b->eof = false;
707 1912 : more = true;
708 : }
709 : /* we need more query text */
710 3824 : if (bstream_next(b) < 0 ||
711 : /* we asked for more data but didn't get any */
712 1912 : (more && b->eof && b->len < b->pos + lc->yycur + n))
713 : return EOF;
714 : }
715 : return 1;
716 : }
717 :
718 : static inline int
719 132529438 : scanner_getc(struct scanner *lc)
720 : {
721 132529438 : bstream *b = lc->rs;
722 132529438 : unsigned char *s = NULL;
723 132529438 : int c, m, n, mask;
724 :
725 132529438 : if (scanner_read_more(lc, 1) == EOF) {
726 : //lc->errstr = SQLSTATE(42000) "end of input stream";
727 : return EOF;
728 : }
729 132396649 : lc->errstr = NULL;
730 :
731 132396649 : s = (unsigned char *) b->buf + b->pos + lc->yycur++;
732 132396649 : if (((c = *s) & 0x80) == 0) {
733 : /* 7-bit char */
734 : return c;
735 : }
736 88230 : for (n = 0, m = 0x40; c & m; n++, m >>= 1)
737 : ;
738 : /* n now is number of 10xxxxxx bytes that should follow */
739 29435 : if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
740 : /* incorrect UTF-8 sequence */
741 : /* n==0: c == 10xxxxxx */
742 : /* n>=6: c == 1111111x */
743 0 : lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
744 0 : goto error;
745 : }
746 :
747 29435 : if (scanner_read_more(lc, (size_t) n) == EOF)
748 : return EOF;
749 29435 : s = (unsigned char *) b->buf + b->pos + lc->yycur;
750 :
751 29435 : mask = utf8chkmsk[n];
752 29435 : c &= ~(0xFFC0 >> n); /* remove non-x bits */
753 88229 : while (--n >= 0) {
754 58795 : c <<= 6;
755 58795 : lc->yycur++;
756 58795 : if (((m = *s++) & 0xC0) != 0x80) {
757 : /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
758 : /* this includes end-of-string (m == 0) */
759 1 : lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
760 1 : goto error;
761 : }
762 58794 : c |= m & 0x3F;
763 : }
764 29434 : if ((c & mask) == 0) {
765 : /* incorrect UTF-8 sequence: not shortest possible */
766 0 : lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
767 0 : goto error;
768 : }
769 :
770 : return c;
771 :
772 1 : error:
773 1 : if (b->pos + lc->yycur < b->len) /* skip bogus char */
774 0 : lc->yycur++;
775 : return EOF;
776 : }
777 :
778 : static int
779 27441207 : scanner_token(struct scanner *lc, int token)
780 : {
781 27441207 : lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
782 27441207 : lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
783 27441207 : lc->yyval = token;
784 27441207 : return lc->yyval;
785 : }
786 :
787 : static int
788 2080833 : scanner_string(mvc *c, int quote, bool escapes)
789 : {
790 2080833 : struct scanner *lc = &c->scanner;
791 2080833 : bstream *rs = lc->rs;
792 2080833 : int cur = quote;
793 2080833 : bool escape = false;
794 2080833 : const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
795 :
796 2080833 : lc->started = 1;
797 2118597 : while (cur != EOF) {
798 2118582 : size_t pos = 0;
799 2118582 : const size_t yycur = rs->pos + lc->yycur;
800 :
801 31145077 : while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
802 29026495 : (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
803 58023544 : cur && (cur != quote || escape)) {
804 26907914 : if (escapes && cur == '\\')
805 6477 : escape = !escape;
806 : else
807 : escape = false;
808 : }
809 2118582 : if (pos == limit) {
810 0 : (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
811 0 : return LEX_ERROR;
812 : }
813 : /* BOM character not allowed as an identifier */
814 2118582 : if (cur == EOF || (quote == '"' && cur == 0xFEFF))
815 1 : return scanner_error(c, cur);
816 2118581 : lc->yycur += pos;
817 : /* check for quote escaped quote: Obscure SQL Rule */
818 2118581 : if (cur == quote && rs->buf[yycur + pos] == quote) {
819 8331 : lc->yycur++;
820 8331 : continue;
821 : }
822 2110250 : assert(yycur + pos <= rs->len + 1);
823 2110250 : if (cur == quote && !escape) {
824 2080803 : return scanner_token(lc, STRING);
825 : }
826 29447 : lc->yycur--; /* go back to current (possibly invalid) char */
827 : /* long utf8, if correct isn't the quote */
828 29447 : if (!cur) {
829 30 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
830 14 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
831 14 : return LEX_ERROR;
832 : }
833 16 : cur = scanner_read_more(lc, 1);
834 : } else {
835 29417 : cur = scanner_getc(lc);
836 : }
837 : }
838 15 : (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
839 15 : return EOF;
840 : }
841 :
842 : /* scan a structure {blah} into a string. We only count the matching {}
843 : * unless escaped. We do not consider embeddings in string literals yet
844 : */
845 :
846 : static int
847 215 : scanner_body(mvc *c)
848 : {
849 215 : struct scanner *lc = &c->scanner;
850 215 : bstream *rs = lc->rs;
851 215 : int cur = (int) 'x';
852 215 : int blk = 1;
853 215 : bool escape = false;
854 :
855 215 : lc->started = 1;
856 215 : assert(rs->buf[rs->pos + lc->yycur-1] == '{');
857 243 : while (cur != EOF) {
858 243 : size_t pos = rs->pos + lc->yycur;
859 :
860 30022 : while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
861 29779 : if (cur != '\\')
862 : escape = false;
863 : else
864 12 : escape = !escape;
865 29779 : blk += cur =='{';
866 29779 : blk -= cur =='}';
867 : }
868 243 : lc->yycur = pos - rs->pos;
869 243 : assert(pos <= rs->len + 1);
870 243 : if (blk == 0 && !escape){
871 215 : lc->yycur--; /* go back to current (possibly invalid) char */
872 215 : return scanner_token(lc, X_BODY);
873 : }
874 28 : lc->yycur--; /* go back to current (possibly invalid) char */
875 28 : if (!cur) {
876 28 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
877 0 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
878 0 : return LEX_ERROR;
879 : }
880 28 : cur = scanner_read_more(lc, 1);
881 : } else {
882 0 : cur = scanner_getc(lc);
883 : }
884 : }
885 0 : (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
886 0 : return EOF;
887 : }
888 :
889 : static int
890 13156751 : keyword_or_ident(mvc * c, int cur)
891 : {
892 13156751 : struct scanner *lc = &c->scanner;
893 13156751 : keyword *k = NULL;
894 13156751 : size_t s;
895 :
896 13156751 : lc->started = 1;
897 13156751 : utf8_putchar(lc, cur);
898 13156731 : s = lc->yycur;
899 13156731 : lc->yyval = IDENT;
900 78560386 : while ((cur = scanner_getc(lc)) != EOF) {
901 78560359 : if (!iswalnum(cur) && cur != '_') {
902 13156704 : utf8_putchar(lc, cur);
903 13156728 : (void)scanner_token(lc, IDENT);
904 13156728 : if ((k = find_keyword_bs(lc,s)))
905 8073866 : lc->yyval = k->token;
906 13156863 : return lc->yyval;
907 : }
908 : }
909 : if (cur < 0)
910 : return cur;
911 : (void)scanner_token(lc, IDENT);
912 : if ((k = find_keyword_bs(lc,s)))
913 : lc->yyval = k->token;
914 : return lc->yyval;
915 : }
916 :
917 : static int
918 13903400 : skip_white_space(struct scanner * lc)
919 : {
920 17698662 : int cur;
921 :
922 17698662 : do {
923 17698662 : lc->yysval = lc->yycur;
924 17698662 : } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
925 13903542 : return cur;
926 : }
927 :
928 : static int
929 67564 : skip_c_comment(struct scanner * lc)
930 : {
931 67564 : int cur;
932 67564 : int prev = 0;
933 67564 : int started = lc->started;
934 67564 : int depth = 1;
935 :
936 67564 : lc->started = 1;
937 1362768 : while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
938 1295204 : if (prev == '*' && cur == '/')
939 67564 : depth--;
940 1227640 : else if (prev == '/' && cur == '*') {
941 : /* block comments can nest */
942 0 : cur = 0; /* prevent slash-star-slash from matching */
943 0 : depth++;
944 : }
945 : prev = cur;
946 : }
947 67564 : lc->yysval = lc->yycur;
948 67564 : lc->started = started;
949 : /* a comment is equivalent to a newline */
950 67564 : return cur == EOF ? cur : '\n';
951 : }
952 :
953 : static int
954 2919 : skip_sql_comment(struct scanner * lc)
955 : {
956 2919 : int cur;
957 2919 : int started = lc->started;
958 :
959 2919 : lc->started = 1;
960 820924 : while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
961 : ;
962 2919 : lc->yysval = lc->yycur;
963 2919 : lc->started = started;
964 : /* a comment is equivalent to a newline */
965 2919 : return cur;
966 : }
967 :
968 : static int tokenize(mvc * lc, int cur);
969 :
970 5647365 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
971 13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
972 10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
973 3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
974 :
975 1874015 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
976 1874015 : struct scanner *lc = &c->scanner;
977 1874015 : bool (*is_valid_n_ary_digit)(int);
978 :
979 1874015 : if (pcur == '_' && !initial_underscore_allowed) /* ERROR: initial underscore not allowed */ {
980 0 : *token = 0;
981 0 : return '_';
982 : }
983 :
984 1874015 : switch (type) {
985 : case BINARYNUM:
986 : is_valid_n_ary_digit = &is_valid_binary_digit;
987 : break;
988 3 : case OCTALNUM:
989 3 : is_valid_n_ary_digit = &is_valid_octal_digit;
990 3 : break;
991 280 : case HEXADECIMALNUM:
992 280 : is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
993 280 : break;
994 1873730 : default:
995 1873730 : is_valid_n_ary_digit = &is_valid_decimal_digit;
996 1873730 : break;
997 : }
998 :
999 1874015 : if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
1000 17 : *token = 0;
1001 17 : return pcur;
1002 : }
1003 :
1004 1873909 : int cur = scanner_getc(lc);
1005 1873861 : *token = type;
1006 3779058 : while (cur != EOF) {
1007 3779054 : if (cur == '_') {
1008 25 : if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
1009 2 : *token = 0;
1010 2 : return '_';
1011 : }
1012 : }
1013 3779029 : else if (!is_valid_n_ary_digit(cur))
1014 : break;
1015 1905315 : pcur = cur;
1016 1905315 : cur = scanner_getc(lc);
1017 : }
1018 :
1019 1873566 : if (pcur == '_') {
1020 3 : *token = 0;
1021 3 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1022 : return cur;
1023 : else /* ERROR: number ends with underscore */
1024 : return '_';
1025 : }
1026 :
1027 : return cur;
1028 : }
1029 :
1030 : static int
1031 1860701 : number(mvc * c, int cur)
1032 : {
1033 1860701 : struct scanner *lc = &c->scanner;
1034 1860701 : int token = sqlINT;
1035 :
1036 : /* a number has one of these forms (expressed in regular expressions):
1037 : * 0x[0-9A-Fa-f]+ -- (hexadecimal) INTEGER
1038 : * \.[0-9]+ -- DECIMAL
1039 : * [0-9]+\.[0-9]* -- DECIMAL
1040 : * [0-9]+@0 -- OID
1041 : * [0-9]*\.[0-9]+[eE][-+]?[0-9]+ -- REAL
1042 : * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
1043 : * [0-9]+ -- (decimal) INTEGER
1044 : */
1045 1860701 : lc->started = 1;
1046 1860701 : if (cur == '0') {
1047 303697 : switch ((cur = scanner_getc(lc))) {
1048 2 : case 'b':
1049 2 : cur = scanner_getc(lc);
1050 2 : if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
1051 : break;
1052 3 : case 'o':
1053 3 : cur = scanner_getc(lc);
1054 3 : if ((cur = check_validity_number(c, cur, true, &token, OCTALNUM)) == EOF) return cur;
1055 : break;
1056 280 : case 'x':
1057 280 : cur = scanner_getc(lc);
1058 280 : if ((cur = check_validity_number(c, cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
1059 : break;
1060 303413 : default:
1061 303413 : utf8_putchar(lc, cur);
1062 303413 : cur = '0';
1063 : }
1064 : }
1065 1860702 : if (token == sqlINT) {
1066 1860426 : if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
1067 1860032 : if (cur == '@') {
1068 0 : if (token == sqlINT) {
1069 0 : cur = scanner_getc(lc);
1070 0 : if (cur == EOF)
1071 : return cur;
1072 0 : if (cur == '0') {
1073 0 : cur = scanner_getc(lc);
1074 0 : if (cur == EOF)
1075 : return cur;
1076 0 : token = OIDNUM;
1077 : } else {
1078 : /* number + '@' not followed by 0: show '@' as erroneous */
1079 0 : utf8_putchar(lc, cur);
1080 0 : cur = '@';
1081 0 : token = 0;
1082 : }
1083 : }
1084 : } else {
1085 1860032 : if (cur == '.') {
1086 11060 : cur = scanner_getc(lc);
1087 11060 : if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
1088 11054 : if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
1089 : }
1090 1860032 : if (token != 0)
1091 1860040 : if (cur == 'e' || cur == 'E') {
1092 2229 : cur = scanner_getc(lc);
1093 2229 : if (cur == '+' || cur == '-')
1094 2111 : cur = scanner_getc(lc);
1095 2229 : if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
1096 : }
1097 : }
1098 : }
1099 :
1100 1858079 : assert(cur != EOF);
1101 :
1102 1860308 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1103 6 : token = 0;
1104 :
1105 1860308 : utf8_putchar(lc, cur);
1106 :
1107 1860247 : if (token) {
1108 1860237 : return scanner_token(lc, token);
1109 : } else {
1110 10 : (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
1111 10 : return LEX_ERROR;
1112 : }
1113 : }
1114 :
1115 : static
1116 12487831 : int scanner_symbol(mvc * c, int cur)
1117 : {
1118 12487831 : struct scanner *lc = &c->scanner;
1119 12487831 : int next = 0;
1120 12487831 : int started = lc->started;
1121 :
1122 12487831 : switch (cur) {
1123 70194 : case '/':
1124 70194 : lc->started = 1;
1125 70194 : next = scanner_getc(lc);
1126 70194 : if (next < 0)
1127 : return EOF;
1128 70194 : if (next == '*') {
1129 67564 : lc->started = started;
1130 67564 : cur = skip_c_comment(lc);
1131 67564 : if (cur < 0)
1132 : return EOF;
1133 67564 : return tokenize(c, cur);
1134 : } else {
1135 2630 : utf8_putchar(lc, next);
1136 2630 : return scanner_token(lc, cur);
1137 : }
1138 0 : case '0':
1139 : case '1':
1140 : case '2':
1141 : case '3':
1142 : case '4':
1143 : case '5':
1144 : case '6':
1145 : case '7':
1146 : case '8':
1147 : case '9':
1148 0 : return number(c, cur);
1149 5 : case '#':
1150 5 : if ((cur = skip_sql_comment(lc)) == EOF)
1151 : return cur;
1152 5 : return tokenize(c, cur);
1153 832785 : case '\'':
1154 832785 : if (lc->raw_string_mode || lc->next_string_is_raw)
1155 57 : return scanner_string(c, cur, false);
1156 832728 : return scanner_string(c, cur, true);
1157 1241075 : case '"':
1158 1241075 : return scanner_string(c, cur, false);
1159 481 : case '{':
1160 : // if previous tokens like LANGUAGE IDENT
1161 : // TODO checking on IDENT only may not be enough
1162 481 : if (lc->yylast == IDENT)
1163 215 : return scanner_body(c);
1164 266 : lc->started = 1;
1165 266 : return scanner_token(lc, cur);
1166 266 : case '}':
1167 266 : lc->started = 1;
1168 266 : return scanner_token(lc, cur);
1169 31417 : case '-':
1170 31417 : lc->started = 1;
1171 31417 : next = scanner_getc(lc);
1172 31417 : if (next < 0)
1173 : return EOF;
1174 31416 : if (next == '-') {
1175 2914 : lc->started = started;
1176 2914 : if ((cur = skip_sql_comment(lc)) == EOF)
1177 : return cur;
1178 2914 : return tokenize(c, cur);
1179 : }
1180 28502 : lc->started = 1;
1181 28502 : utf8_putchar(lc, next);
1182 28502 : return scanner_token(lc, cur);
1183 12 : case '~': /* binary not */
1184 12 : lc->started = 1;
1185 12 : next = scanner_getc(lc);
1186 12 : if (next < 0)
1187 : return EOF;
1188 12 : if (next == '=')
1189 5 : return scanner_token(lc, GEOM_MBR_EQUAL);
1190 7 : utf8_putchar(lc, next);
1191 7 : return scanner_token(lc, cur);
1192 6976019 : case '^': /* binary xor */
1193 : case '*':
1194 : case '?':
1195 : case ':':
1196 : case '%':
1197 : case '+':
1198 : case '(':
1199 : case ')':
1200 : case ',':
1201 : case '=':
1202 : case '[':
1203 : case ']':
1204 6976019 : lc->started = 1;
1205 6976019 : return scanner_token(lc, cur);
1206 5989 : case '&':
1207 5989 : lc->started = 1;
1208 5989 : cur = scanner_getc(lc);
1209 5989 : if (cur < 0)
1210 : return EOF;
1211 5989 : if (cur < 0)
1212 : return EOF;
1213 5989 : if(cur == '<') {
1214 3 : next = scanner_getc(lc);
1215 3 : if (next < 0)
1216 : return EOF;
1217 3 : if(next == '|') {
1218 0 : return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
1219 : } else {
1220 3 : utf8_putchar(lc, next); //put the char back
1221 3 : return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
1222 : }
1223 5986 : } else if(cur == '>')
1224 3 : return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
1225 5983 : else if(cur == '&')
1226 3 : return scanner_token(lc, GEOM_OVERLAP);
1227 : else {/* binary and */
1228 5980 : utf8_putchar(lc, cur); //put the char back
1229 5980 : return scanner_token(lc, '&');
1230 : }
1231 19 : case '@':
1232 19 : lc->started = 1;
1233 19 : return scanner_token(lc, AT);
1234 957712 : case ';':
1235 957712 : lc->started = 0;
1236 957712 : return scanner_token(lc, SCOLON);
1237 50465 : case '<':
1238 50465 : lc->started = 1;
1239 50465 : cur = scanner_getc(lc);
1240 50465 : if (cur < 0)
1241 : return EOF;
1242 50465 : if (cur == '=') {
1243 3120 : return scanner_token( lc, COMPARISON);
1244 47345 : } else if (cur == '>') {
1245 33991 : return scanner_token( lc, COMPARISON);
1246 13354 : } else if (cur == '<') {
1247 46 : next = scanner_getc(lc);
1248 46 : if (next < 0)
1249 : return EOF;
1250 46 : if (next == '=') {
1251 4 : return scanner_token( lc, LEFT_SHIFT_ASSIGN);
1252 42 : } else if (next == '|') {
1253 1 : return scanner_token(lc, GEOM_BELOW);
1254 : } else {
1255 41 : utf8_putchar(lc, next); //put the char back
1256 41 : return scanner_token( lc, LEFT_SHIFT);
1257 : }
1258 13308 : } else if(cur == '-') {
1259 19 : next = scanner_getc(lc);
1260 19 : if (next < 0)
1261 : return EOF;
1262 19 : if(next == '>') {
1263 7 : return scanner_token(lc, GEOM_DIST);
1264 : } else {
1265 : //put the characters back and fall in the next possible case
1266 12 : utf8_putchar(lc, next);
1267 12 : utf8_putchar(lc, cur);
1268 12 : return scanner_token( lc, COMPARISON);
1269 : }
1270 : } else {
1271 13289 : utf8_putchar(lc, cur);
1272 13289 : return scanner_token( lc, COMPARISON);
1273 : }
1274 46980 : case '>':
1275 46980 : lc->started = 1;
1276 46980 : cur = scanner_getc(lc);
1277 46980 : if (cur < 0)
1278 : return EOF;
1279 46980 : if (cur == '>') {
1280 2531 : cur = scanner_getc(lc);
1281 2531 : if (cur < 0)
1282 : return EOF;
1283 2531 : if (cur == '=')
1284 3 : return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
1285 2528 : utf8_putchar(lc, cur);
1286 2528 : return scanner_token( lc, RIGHT_SHIFT);
1287 44449 : } else if (cur != '=') {
1288 42221 : utf8_putchar(lc, cur);
1289 42221 : return scanner_token( lc, COMPARISON);
1290 : } else {
1291 2228 : return scanner_token( lc, COMPARISON);
1292 : }
1293 2062802 : case '.':
1294 2062802 : lc->started = 1;
1295 2062802 : cur = scanner_getc(lc);
1296 2062802 : if (cur < 0)
1297 : return EOF;
1298 2062801 : if (!iswdigit(cur)) {
1299 2062788 : utf8_putchar(lc, cur);
1300 2062788 : return scanner_token( lc, '.');
1301 : } else {
1302 13 : utf8_putchar(lc, cur);
1303 13 : cur = '.';
1304 13 : return number(c, cur);
1305 : }
1306 211576 : case '|': /* binary or or string concat */
1307 211576 : lc->started = 1;
1308 211576 : cur = scanner_getc(lc);
1309 211576 : if (cur < 0)
1310 : return EOF;
1311 211576 : if (cur == '|') {
1312 211547 : return scanner_token(lc, CONCATSTRING);
1313 29 : } else if (cur == '&') {
1314 0 : next = scanner_getc(lc);
1315 0 : if (next < 0)
1316 : return EOF;
1317 0 : if(next == '>') {
1318 0 : return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
1319 : } else {
1320 0 : utf8_putchar(lc, next); //put the char back
1321 0 : utf8_putchar(lc, cur); //put the char back
1322 0 : return scanner_token(lc, '|');
1323 : }
1324 29 : } else if (cur == '>') {
1325 1 : next = scanner_getc(lc);
1326 1 : if (next < 0)
1327 : return EOF;
1328 1 : if(next == '>') {
1329 1 : return scanner_token(lc, GEOM_ABOVE);
1330 : } else {
1331 0 : utf8_putchar(lc, next); //put the char back
1332 0 : utf8_putchar(lc, cur); //put the char back
1333 0 : return scanner_token(lc, '|');
1334 : }
1335 : } else {
1336 28 : utf8_putchar(lc, cur);
1337 28 : return scanner_token(lc, '|');
1338 : }
1339 : }
1340 34 : (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
1341 34 : return LEX_ERROR;
1342 : }
1343 :
1344 : static int
1345 27529008 : tokenize(mvc * c, int cur)
1346 : {
1347 27529008 : struct scanner *lc = &c->scanner;
1348 55293526 : while (1) {
1349 41411267 : if (cur == 0xFEFF) {
1350 : /* on Linux at least, iswpunct returns TRUE
1351 : * for U+FEFF, but we don't want that, we just
1352 : * want to go to the scanner_error case
1353 : * below */
1354 : ;
1355 41411366 : } else if (iswspace(cur)) {
1356 13899155 : if ((cur = skip_white_space(lc)) == EOF)
1357 : return cur;
1358 13882259 : continue; /* try again */
1359 27512211 : } else if (iswdigit(cur)) {
1360 1860719 : return number(c, cur);
1361 25651492 : } else if (iswalpha(cur) || cur == '_') {
1362 13130700 : switch (cur) {
1363 646966 : case 'e': /* string with escapes */
1364 : case 'E':
1365 646966 : if (scanner_read_more(lc, 1) != EOF &&
1366 646966 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1367 3694 : return scanner_string(c, scanner_getc(lc), true);
1368 : }
1369 : break;
1370 401272 : case 'x': /* blob */
1371 : case 'X':
1372 : case 'r': /* raw string */
1373 : case 'R':
1374 401272 : if (scanner_read_more(lc, 1) != EOF &&
1375 401272 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1376 3262 : return scanner_string(c, scanner_getc(lc), false);
1377 : }
1378 : break;
1379 150390 : case 'u': /* unicode string */
1380 : case 'U':
1381 150390 : if (scanner_read_more(lc, 1) != EOF &&
1382 150407 : lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
1383 17 : scanner_read_more(lc, 2) != EOF &&
1384 17 : (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
1385 : lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
1386 17 : cur = scanner_getc(lc); /* '&' */
1387 17 : return scanner_string(c, scanner_getc(lc), false);
1388 : }
1389 : break;
1390 : default:
1391 : break;
1392 : }
1393 13156762 : return keyword_or_ident(c, cur);
1394 12487757 : } else if (iswpunct(cur)) {
1395 12487626 : return scanner_symbol(c, cur);
1396 : }
1397 32 : if (cur == EOF) {
1398 0 : if (lc->mode == LINE_1 || !lc->started )
1399 : return cur;
1400 0 : return scanner_error(c, cur);
1401 : }
1402 : /* none of the above: error */
1403 32 : return scanner_error(c, cur);
1404 : }
1405 : }
1406 :
1407 : /* SQL 'quoted' idents consist of a set of any character of
1408 : * the source language character set other than a 'quote'
1409 : *
1410 : * MonetDB has 3 restrictions:
1411 : * 1 we disallow '%' as the first character.
1412 : * 2 the length is limited to 1024 characters
1413 : * 3 the identifier 'TID%' is not allowed
1414 : */
1415 : static bool
1416 1241064 : valid_ident(const char *restrict s, char *restrict dst)
1417 : {
1418 1241064 : int p = 0;
1419 :
1420 1241064 : if (*s == '%')
1421 : return false;
1422 :
1423 9204015 : while (*s) {
1424 7962951 : if ((dst[p++] = *s++) == '"' && *s == '"')
1425 62 : s++;
1426 7962951 : if (p >= 1024)
1427 : return false;
1428 : }
1429 1241064 : dst[p] = '\0';
1430 1241064 : if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
1431 : return false;
1432 : return true;
1433 : }
1434 :
1435 : static inline int
1436 27632350 : sql_get_next_token(YYSTYPE *yylval, void *parm)
1437 : {
1438 27632350 : mvc *c = (mvc*)parm;
1439 27632350 : struct scanner *lc = &c->scanner;
1440 27632350 : int token = 0, cur = 0;
1441 :
1442 27632350 : if (lc->rs->buf == NULL) /* malloc failure */
1443 : return EOF;
1444 :
1445 27632350 : if (lc->yynext) {
1446 59996 : int next = lc->yynext;
1447 :
1448 59996 : lc->yynext = 0;
1449 59996 : return(next);
1450 : }
1451 :
1452 27572354 : if (lc->yybak) {
1453 26576283 : lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
1454 26576283 : lc->yybak = 0;
1455 : }
1456 :
1457 27572354 : lc->yysval = lc->yycur;
1458 27572354 : lc->yylast = lc->yyval;
1459 27572354 : cur = scanner_getc(lc);
1460 27570679 : if (cur < 0)
1461 : return EOF;
1462 27459407 : token = tokenize(c, cur);
1463 :
1464 27458804 : yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
1465 :
1466 : /* This is needed as ALIAS and aTYPE get defined too late, see
1467 : sql_keyword.h */
1468 27458804 : if (token == KW_ALIAS)
1469 : token = ALIAS;
1470 :
1471 27453223 : if (token == KW_TYPE)
1472 48609 : token = aTYPE;
1473 :
1474 27458804 : if (token == IDENT || token == COMPARISON ||
1475 22280944 : token == RANK || token == aTYPE || token == ALIAS || token == MARGFUNC) {
1476 5241211 : yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
1477 5241247 : lc->next_string_is_raw = false;
1478 22217593 : } else if (token == STRING) {
1479 2080803 : char quote = *yylval->sval;
1480 2080803 : char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
1481 2080803 : char *dst;
1482 :
1483 2080803 : assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
1484 :
1485 2080803 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
1486 2080803 : switch (quote) {
1487 1241064 : case '"':
1488 1241064 : if (valid_ident(yylval->sval+1,str)) {
1489 : token = IDENT;
1490 : } else {
1491 0 : sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
1492 0 : return LEX_ERROR;
1493 : }
1494 : break;
1495 3693 : case 'e':
1496 : case 'E':
1497 3693 : assert(yylval->sval[1] == '\'');
1498 3693 : if (GDKstrFromStr((unsigned char *) str,
1499 : (unsigned char *) yylval->sval + 2,
1500 3693 : lc->yycur-lc->yysval - 2, '\'') < 0) {
1501 1 : char *err = GDKerrbuf;
1502 1 : if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
1503 1 : err += strlen(GDKERROR);
1504 0 : else if (*err == '!')
1505 0 : err++;
1506 1 : sql_error(c, 1, SQLSTATE(42000) "%s", err);
1507 1 : return LEX_ERROR;
1508 : }
1509 : quote = '\'';
1510 : break;
1511 17 : case 'u':
1512 : case 'U':
1513 17 : assert(yylval->sval[1] == '&');
1514 17 : assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
1515 17 : strcpy(str, yylval->sval + 3);
1516 17 : token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
1517 17 : quote = yylval->sval[2];
1518 17 : lc->next_string_is_raw = true;
1519 17 : break;
1520 1 : case 'x':
1521 : case 'X':
1522 1 : assert(yylval->sval[1] == '\'');
1523 1 : dst = str;
1524 5 : for (char *src = yylval->sval + 2; *src; dst++)
1525 4 : if ((*dst = *src++) == '\'' && *src == '\'')
1526 0 : src++;
1527 1 : *dst = 0;
1528 1 : quote = '\'';
1529 1 : token = XSTRING;
1530 1 : lc->next_string_is_raw = true;
1531 1 : break;
1532 3254 : case 'r':
1533 : case 'R':
1534 3254 : assert(yylval->sval[1] == '\'');
1535 3254 : dst = str;
1536 448935 : for (char *src = yylval->sval + 2; *src; dst++)
1537 445681 : if ((*dst = *src++) == '\'' && *src == '\'')
1538 2708 : src++;
1539 3254 : quote = '\'';
1540 3254 : *dst = 0;
1541 3254 : break;
1542 832774 : default:
1543 832774 : if (lc->raw_string_mode || lc->next_string_is_raw) {
1544 57 : dst = str;
1545 524 : for (char *src = yylval->sval + 1; *src; dst++)
1546 467 : if ((*dst = *src++) == '\'' && *src == '\'')
1547 3 : src++;
1548 57 : *dst = 0;
1549 : } else {
1550 832717 : if (GDKstrFromStr((unsigned char *)str,
1551 832717 : (unsigned char *)yylval->sval + 1,
1552 832717 : lc->yycur - lc->yysval - 1,
1553 : '\'') < 0) {
1554 1 : sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
1555 1 : return LEX_ERROR;
1556 : }
1557 : }
1558 : break;
1559 : }
1560 2080801 : yylval->sval = str;
1561 :
1562 : /* reset original */
1563 2080801 : lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
1564 : } else {
1565 20136790 : lc->next_string_is_raw = false;
1566 : }
1567 :
1568 : return(token);
1569 : }
1570 :
1571 : static int scanner( YYSTYPE *yylval, void *m, bool log);
1572 :
1573 : static int
1574 27500666 : scanner(YYSTYPE * yylval, void *parm, bool log)
1575 : {
1576 27500666 : int token;
1577 27500666 : mvc *c = (mvc *) parm;
1578 27500666 : struct scanner *lc = &c->scanner;
1579 27500666 : size_t pos;
1580 :
1581 : /* store position for when view's query ends */
1582 27500666 : pos = lc->rs->pos + lc->yycur;
1583 :
1584 27500666 : token = sql_get_next_token(yylval, parm);
1585 :
1586 27497695 : if (token == NOT) {
1587 72543 : int next = scanner(yylval, parm, false);
1588 :
1589 72543 : if (next == NOT) {
1590 2 : return scanner(yylval, parm, false);
1591 : } else if (next == EXISTS) {
1592 : token = NOT_EXISTS;
1593 : } else if (next == BETWEEN) {
1594 : token = NOT_BETWEEN;
1595 : } else if (next == sqlIN) {
1596 : token = NOT_IN;
1597 : } else if (next == LIKE) {
1598 : token = NOT_LIKE;
1599 : } else if (next == ILIKE) {
1600 : token = NOT_ILIKE;
1601 : } else {
1602 59996 : lc->yynext = next;
1603 : }
1604 27425152 : } else if (token == SCOLON) {
1605 : /* ignore semi-colon(s) following a semi-colon */
1606 957681 : if (lc->yylast == SCOLON) {
1607 131961 : size_t prev = lc->yycur;
1608 131962 : while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
1609 1 : prev = lc->yycur;
1610 :
1611 : /* skip the skipped stuff also in the buffer */
1612 131961 : lc->rs->pos += prev;
1613 131961 : lc->yycur -= prev;
1614 : }
1615 : }
1616 :
1617 27497693 : if (lc->log && log)
1618 0 : mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
1619 :
1620 27497693 : lc->started += (token != EOF);
1621 27497693 : return token;
1622 : }
1623 :
1624 : /* also see sql_parser.y */
1625 : extern int sqllex(YYSTYPE * yylval, void *parm);
1626 :
1627 : int
1628 27428436 : sqllex(YYSTYPE * yylval, void *parm)
1629 : {
1630 27428436 : return scanner(yylval, parm, true);
1631 : }
|