Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include <wctype.h>
15 : #include "sql_mem.h"
16 : #include "sql_scan.h"
17 : #include "sql_types.h"
18 : #include "sql_symbol.h"
19 : #include "sql_mvc.h"
20 : #include "sql_parser.tab.h"
21 : #include "sql_semantic.h"
22 : #include "sql_parser.h" /* for sql_error() */
23 :
24 : #include "stream.h"
25 : #include "mapi_prompt.h"
26 : #include <unistd.h>
27 : #include <string.h>
28 : #include <ctype.h>
29 : #include "sql_keyword.h"
30 :
31 : /**
32 : * Removes all comments before the query. In query comments are kept.
33 : */
34 : char *
35 402717 : query_cleaned(allocator *sa, const char *query)
36 : {
37 402717 : char *q, *r, *c = NULL;
38 402717 : int lines = 0;
39 402717 : int quote = 0; /* inside quotes ('..', "..", {..}) */
40 402717 : bool bs = false; /* seen a backslash in a quoted string */
41 402717 : bool incomment1 = false; /* inside traditional C style comment */
42 402717 : bool incomment2 = false; /* inside comment starting with -- */
43 402717 : bool inline_comment = false;
44 :
45 402717 : r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
46 402930 : if(!r)
47 : return NULL;
48 :
49 68287171 : for (q = r; *query; query++) {
50 67884241 : if (incomment1) {
51 16186 : if (*query == '/' && query[-1] == '*') {
52 234 : incomment1 = false;
53 234 : if (c == r && lines > 0) {
54 226 : q = r; // reset to beginning
55 226 : lines = 0;
56 226 : continue;
57 : }
58 : }
59 15960 : if (*query == '\n') lines++;
60 15960 : *q++ = *query;
61 67868055 : } else if (incomment2) {
62 831143 : if (*query == '\n') {
63 2906 : incomment2 = false;
64 2906 : inline_comment = false;
65 : /* add newline only if comment doesn't
66 : * occupy whole line */
67 2906 : if (q > r && q[-1] != '\n'){
68 1000 : *q++ = '\n';
69 1000 : lines++;
70 : }
71 828237 : } else if (inline_comment){
72 24007 : *q++ = *query; // preserve in line query comments
73 : }
74 67036912 : } else if (quote) {
75 22309373 : if (bs) {
76 : bs = false;
77 22306082 : } else if (*query == '\\') {
78 : bs = true;
79 22302778 : } else if (*query == quote) {
80 680589 : quote = 0;
81 : }
82 22309373 : *q++ = *query;
83 44727539 : } else if (*query == '"' || *query == '\'') {
84 680121 : quote = *query;
85 680121 : *q++ = *query;
86 44047418 : } else if (*query == '{') {
87 513 : quote = '}';
88 513 : *q++ = *query;
89 44046905 : } else if (*query == '-' && query[1] == '-') {
90 2906 : if (q > r && q[-1] != '\n') {
91 1000 : inline_comment = true;
92 1000 : *q++ = *query; // preserve in line query comments
93 : }
94 : incomment2 = true;
95 44043999 : } else if (*query == '/' && query[1] == '*') {
96 234 : incomment1 = true;
97 234 : c = q;
98 234 : *q++ = *query;
99 44043765 : } else if (*query == '\n') {
100 : /* collapse newlines */
101 866883 : if (q > r && q[-1] != '\n') {
102 825069 : *q++ = '\n';
103 825069 : lines++;
104 : }
105 43176882 : } else if (*query == ' ' || *query == '\t') {
106 : /* collapse white space */
107 7096009 : if (q > r && q[-1] != ' ')
108 5616045 : *q++ = ' ';
109 : } else {
110 36080873 : *q++ = *query;
111 : }
112 : }
113 402930 : *q = 0;
114 402930 : return r;
115 : }
116 :
117 : int
118 352 : scanner_init_keywords(void)
119 : {
120 352 : int failed = 0;
121 :
122 352 : failed += keywords_insert("false", BOOL_FALSE);
123 352 : failed += keywords_insert("true", BOOL_TRUE);
124 352 : failed += keywords_insert("bool", sqlBOOL);
125 :
126 352 : failed += keywords_insert("ALTER", ALTER);
127 352 : failed += keywords_insert("ADD", ADD);
128 352 : failed += keywords_insert("AND", AND);
129 :
130 352 : failed += keywords_insert("RANK", RANK);
131 352 : failed += keywords_insert("DENSE_RANK", RANK);
132 352 : failed += keywords_insert("PERCENT_RANK", RANK);
133 352 : failed += keywords_insert("CUME_DIST", RANK);
134 352 : failed += keywords_insert("ROW_NUMBER", RANK);
135 352 : failed += keywords_insert("NTILE", RANK);
136 352 : failed += keywords_insert("LAG", RANK);
137 352 : failed += keywords_insert("LEAD", RANK);
138 352 : failed += keywords_insert("FETCH", FETCH);
139 352 : failed += keywords_insert("FIRST_VALUE", RANK);
140 352 : failed += keywords_insert("LAST_VALUE", RANK);
141 352 : failed += keywords_insert("NTH_VALUE", RANK);
142 :
143 352 : failed += keywords_insert("BEST", BEST);
144 352 : failed += keywords_insert("EFFORT", EFFORT);
145 :
146 352 : failed += keywords_insert("AS", AS);
147 352 : failed += keywords_insert("ASC", ASC);
148 352 : failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
149 352 : failed += keywords_insert("BETWEEN", BETWEEN);
150 352 : failed += keywords_insert("SYMMETRIC", SYMMETRIC);
151 352 : failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
152 352 : failed += keywords_insert("BY", BY);
153 352 : failed += keywords_insert("CAST", CAST);
154 352 : failed += keywords_insert("CONVERT", CONVERT);
155 352 : failed += keywords_insert("CHARACTER", CHARACTER);
156 352 : failed += keywords_insert("CHAR", CHARACTER);
157 352 : failed += keywords_insert("VARYING", VARYING);
158 352 : failed += keywords_insert("VARCHAR", VARCHAR);
159 352 : failed += keywords_insert("BINARY", BINARY);
160 352 : failed += keywords_insert("LARGE", LARGE);
161 352 : failed += keywords_insert("OBJECT", OBJECT);
162 352 : failed += keywords_insert("CLOB", CLOB);
163 352 : failed += keywords_insert("BLOB", sqlBLOB);
164 352 : failed += keywords_insert("TEXT", sqlTEXT);
165 352 : failed += keywords_insert("TINYTEXT", sqlTEXT);
166 352 : failed += keywords_insert("STRING", CLOB); /* ? */
167 352 : failed += keywords_insert("CHECK", CHECK);
168 352 : failed += keywords_insert("CLIENT", CLIENT);
169 352 : failed += keywords_insert("SERVER", SERVER);
170 352 : failed += keywords_insert("COMMENT", COMMENT);
171 352 : failed += keywords_insert("CONSTRAINT", CONSTRAINT);
172 352 : failed += keywords_insert("CREATE", CREATE);
173 352 : failed += keywords_insert("CROSS", CROSS);
174 352 : failed += keywords_insert("COPY", COPY);
175 352 : failed += keywords_insert("RECORDS", RECORDS);
176 352 : failed += keywords_insert("DELIMITERS", DELIMITERS);
177 352 : failed += keywords_insert("STDIN", STDIN);
178 352 : failed += keywords_insert("STDOUT", STDOUT);
179 :
180 352 : failed += keywords_insert("TINYINT", TINYINT);
181 352 : failed += keywords_insert("SMALLINT", SMALLINT);
182 352 : failed += keywords_insert("INTEGER", sqlINTEGER);
183 352 : failed += keywords_insert("INT", sqlINTEGER);
184 352 : failed += keywords_insert("MEDIUMINT", sqlINTEGER);
185 352 : failed += keywords_insert("BIGINT", BIGINT);
186 : #ifdef HAVE_HGE
187 352 : failed += keywords_insert("HUGEINT", HUGEINT);
188 : #endif
189 352 : failed += keywords_insert("DEC", sqlDECIMAL);
190 352 : failed += keywords_insert("DECIMAL", sqlDECIMAL);
191 352 : failed += keywords_insert("NUMERIC", sqlDECIMAL);
192 352 : failed += keywords_insert("DECLARE", DECLARE);
193 352 : failed += keywords_insert("DEFAULT", DEFAULT);
194 352 : failed += keywords_insert("DESC", DESC);
195 352 : failed += keywords_insert("DISTINCT", DISTINCT);
196 352 : failed += keywords_insert("DOUBLE", sqlDOUBLE);
197 352 : failed += keywords_insert("REAL", sqlREAL);
198 352 : failed += keywords_insert("DROP", DROP);
199 352 : failed += keywords_insert("ESCAPE", ESCAPE);
200 352 : failed += keywords_insert("EXISTS", EXISTS);
201 352 : failed += keywords_insert("UESCAPE", UESCAPE);
202 352 : failed += keywords_insert("EXTRACT", EXTRACT);
203 352 : failed += keywords_insert("FLOAT", sqlFLOAT);
204 352 : failed += keywords_insert("FOR", FOR);
205 352 : failed += keywords_insert("FOREIGN", FOREIGN);
206 352 : failed += keywords_insert("FROM", FROM);
207 352 : failed += keywords_insert("FWF", FWF);
208 :
209 352 : failed += keywords_insert("BIG", BIG);
210 352 : failed += keywords_insert("LITTLE", LITTLE);
211 352 : failed += keywords_insert("NATIVE", NATIVE);
212 352 : failed += keywords_insert("ENDIAN", ENDIAN);
213 :
214 352 : failed += keywords_insert("REFERENCES", REFERENCES);
215 :
216 352 : failed += keywords_insert("MATCH", MATCH);
217 352 : failed += keywords_insert("FULL", FULL);
218 352 : failed += keywords_insert("PARTIAL", PARTIAL);
219 352 : failed += keywords_insert("SIMPLE", SIMPLE);
220 :
221 352 : failed += keywords_insert("INSERT", INSERT);
222 352 : failed += keywords_insert("UPDATE", UPDATE);
223 352 : failed += keywords_insert("DELETE", sqlDELETE);
224 352 : failed += keywords_insert("TRUNCATE", TRUNCATE);
225 352 : failed += keywords_insert("MATCHED", MATCHED);
226 :
227 352 : failed += keywords_insert("ACTION", ACTION);
228 352 : failed += keywords_insert("CASCADE", CASCADE);
229 352 : failed += keywords_insert("RESTRICT", RESTRICT);
230 352 : failed += keywords_insert("FIRST", FIRST);
231 352 : failed += keywords_insert("GLOBAL", GLOBAL);
232 352 : failed += keywords_insert("GROUP", sqlGROUP);
233 352 : failed += keywords_insert("GROUPING", GROUPING);
234 352 : failed += keywords_insert("ROLLUP", ROLLUP);
235 352 : failed += keywords_insert("CUBE", CUBE);
236 352 : failed += keywords_insert("HAVING", HAVING);
237 352 : failed += keywords_insert("ILIKE", ILIKE);
238 352 : failed += keywords_insert("IMPRINTS", IMPRINTS);
239 352 : failed += keywords_insert("IN", sqlIN);
240 352 : failed += keywords_insert("INNER", INNER);
241 352 : failed += keywords_insert("INTO", INTO);
242 352 : failed += keywords_insert("IS", IS);
243 352 : failed += keywords_insert("JOIN", JOIN);
244 352 : failed += keywords_insert("KEY", KEY);
245 352 : failed += keywords_insert("LATERAL", LATERAL);
246 352 : failed += keywords_insert("LEFT", LEFT);
247 352 : failed += keywords_insert("LIKE", LIKE);
248 352 : failed += keywords_insert("LIMIT", LIMIT);
249 352 : failed += keywords_insert("SAMPLE", SAMPLE);
250 352 : failed += keywords_insert("SEED", SEED);
251 352 : failed += keywords_insert("LAST", LAST);
252 352 : failed += keywords_insert("LOCAL", LOCAL);
253 352 : failed += keywords_insert("NATURAL", NATURAL);
254 352 : failed += keywords_insert("NOT", NOT);
255 352 : failed += keywords_insert("NULL", sqlNULL);
256 352 : failed += keywords_insert("NULLS", NULLS);
257 352 : failed += keywords_insert("OFFSET", OFFSET);
258 352 : failed += keywords_insert("ON", ON);
259 352 : failed += keywords_insert("OPTIONS", OPTIONS);
260 352 : failed += keywords_insert("OPTION", OPTION);
261 352 : failed += keywords_insert("OR", OR);
262 352 : failed += keywords_insert("ORDER", ORDER);
263 352 : failed += keywords_insert("ORDERED", ORDERED);
264 352 : failed += keywords_insert("OUTER", OUTER);
265 352 : failed += keywords_insert("OVER", OVER);
266 352 : failed += keywords_insert("PARTITION", PARTITION);
267 352 : failed += keywords_insert("PATH", PATH);
268 352 : failed += keywords_insert("PRECISION", PRECISION);
269 352 : failed += keywords_insert("PRIMARY", PRIMARY);
270 :
271 352 : failed += keywords_insert("USER", USER);
272 352 : failed += keywords_insert("RENAME", RENAME);
273 352 : failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
274 352 : failed += keywords_insert("ENCRYPTED", ENCRYPTED);
275 352 : failed += keywords_insert("PASSWORD", PASSWORD);
276 352 : failed += keywords_insert("GRANT", GRANT);
277 352 : failed += keywords_insert("REVOKE", REVOKE);
278 352 : failed += keywords_insert("ROLE", ROLE);
279 352 : failed += keywords_insert("ADMIN", ADMIN);
280 352 : failed += keywords_insert("PRIVILEGES", PRIVILEGES);
281 352 : failed += keywords_insert("PUBLIC", PUBLIC);
282 352 : failed += keywords_insert("CURRENT_USER", CURRENT_USER);
283 352 : failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
284 352 : failed += keywords_insert("SESSION_USER", SESSION_USER);
285 352 : failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
286 352 : failed += keywords_insert("SESSION", sqlSESSION);
287 352 : failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
288 352 : failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
289 352 : failed += keywords_insert("OPTIMIZER", OPTIMIZER);
290 :
291 352 : failed += keywords_insert("RIGHT", RIGHT);
292 352 : failed += keywords_insert("SCHEMA", SCHEMA);
293 352 : failed += keywords_insert("SELECT", SELECT);
294 352 : failed += keywords_insert("SET", SET);
295 352 : failed += keywords_insert("SETS", SETS);
296 352 : failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
297 :
298 352 : failed += keywords_insert("ALL", ALL);
299 352 : failed += keywords_insert("ANY", ANY);
300 352 : failed += keywords_insert("SOME", SOME);
301 352 : failed += keywords_insert("EVERY", ANY);
302 : /*
303 : failed += keywords_insert("SQLCODE", SQLCODE );
304 : */
305 352 : failed += keywords_insert("COLUMN", COLUMN);
306 352 : failed += keywords_insert("TABLE", TABLE);
307 352 : failed += keywords_insert("TEMPORARY", TEMPORARY);
308 352 : failed += keywords_insert("TEMP", TEMP);
309 352 : failed += keywords_insert("REMOTE", REMOTE);
310 352 : failed += keywords_insert("MERGE", MERGE);
311 352 : failed += keywords_insert("REPLICA", REPLICA);
312 352 : failed += keywords_insert("UNLOGGED", UNLOGGED);
313 352 : failed += keywords_insert("TO", TO);
314 352 : failed += keywords_insert("UNION", UNION);
315 352 : failed += keywords_insert("EXCEPT", EXCEPT);
316 352 : failed += keywords_insert("INTERSECT", INTERSECT);
317 352 : failed += keywords_insert("CORRESPONDING", CORRESPONDING);
318 352 : failed += keywords_insert("UNIQUE", UNIQUE);
319 352 : failed += keywords_insert("USING", USING);
320 352 : failed += keywords_insert("VALUES", VALUES);
321 352 : failed += keywords_insert("VIEW", VIEW);
322 352 : failed += keywords_insert("WHERE", WHERE);
323 352 : failed += keywords_insert("WITH", WITH);
324 352 : failed += keywords_insert("WITHIN", WITHIN);
325 352 : failed += keywords_insert("WITHOUT", WITHOUT);
326 352 : failed += keywords_insert("DATA", DATA);
327 :
328 352 : failed += keywords_insert("DATE", sqlDATE);
329 352 : failed += keywords_insert("TIME", TIME);
330 352 : failed += keywords_insert("TIMESTAMP", TIMESTAMP);
331 352 : failed += keywords_insert("INTERVAL", INTERVAL);
332 352 : failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
333 352 : failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
334 352 : failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
335 352 : failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
336 352 : failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
337 352 : failed += keywords_insert("LOCALTIME", LOCALTIME);
338 352 : failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
339 352 : failed += keywords_insert("ZONE", ZONE);
340 :
341 352 : failed += keywords_insert("CENTURY", CENTURY);
342 352 : failed += keywords_insert("DECADE", DECADE);
343 352 : failed += keywords_insert("YEAR", YEAR);
344 352 : failed += keywords_insert("QUARTER", QUARTER);
345 352 : failed += keywords_insert("MONTH", MONTH);
346 352 : failed += keywords_insert("WEEK", WEEK);
347 352 : failed += keywords_insert("DOW", DOW);
348 352 : failed += keywords_insert("DOY", DOY);
349 352 : failed += keywords_insert("DAY", DAY);
350 352 : failed += keywords_insert("HOUR", HOUR);
351 352 : failed += keywords_insert("MINUTE", MINUTE);
352 352 : failed += keywords_insert("SECOND", SECOND);
353 352 : failed += keywords_insert("EPOCH", EPOCH);
354 :
355 352 : failed += keywords_insert("POSITION", POSITION);
356 352 : failed += keywords_insert("SUBSTRING", SUBSTRING);
357 352 : failed += keywords_insert("SPLIT_PART", SPLIT_PART);
358 352 : failed += keywords_insert("TRIM", TRIM);
359 352 : failed += keywords_insert("LEADING", LEADING);
360 352 : failed += keywords_insert("TRAILING", TRAILING);
361 352 : failed += keywords_insert("BOTH", BOTH);
362 :
363 352 : failed += keywords_insert("CASE", CASE);
364 352 : failed += keywords_insert("WHEN", WHEN);
365 352 : failed += keywords_insert("THEN", THEN);
366 352 : failed += keywords_insert("ELSE", ELSE);
367 352 : failed += keywords_insert("END", END);
368 352 : failed += keywords_insert("NULLIF", NULLIF);
369 352 : failed += keywords_insert("COALESCE", COALESCE);
370 352 : failed += keywords_insert("ELSEIF", ELSEIF);
371 352 : failed += keywords_insert("IF", IF);
372 352 : failed += keywords_insert("WHILE", WHILE);
373 352 : failed += keywords_insert("DO", DO);
374 :
375 352 : failed += keywords_insert("COMMIT", COMMIT);
376 352 : failed += keywords_insert("ROLLBACK", ROLLBACK);
377 352 : failed += keywords_insert("SAVEPOINT", SAVEPOINT);
378 352 : failed += keywords_insert("RELEASE", RELEASE);
379 352 : failed += keywords_insert("WORK", WORK);
380 352 : failed += keywords_insert("CHAIN", CHAIN);
381 352 : failed += keywords_insert("PRESERVE", PRESERVE);
382 352 : failed += keywords_insert("ROWS", ROWS);
383 352 : failed += keywords_insert("NO", NO);
384 352 : failed += keywords_insert("START", START);
385 352 : failed += keywords_insert("TRANSACTION", TRANSACTION);
386 352 : failed += keywords_insert("READ", READ);
387 352 : failed += keywords_insert("WRITE", WRITE);
388 352 : failed += keywords_insert("ONLY", ONLY);
389 352 : failed += keywords_insert("ISOLATION", ISOLATION);
390 352 : failed += keywords_insert("LEVEL", LEVEL);
391 352 : failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
392 352 : failed += keywords_insert("COMMITTED", COMMITTED);
393 352 : failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
394 352 : failed += keywords_insert("SNAPSHOT", SNAPSHOT);
395 352 : failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
396 352 : failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
397 352 : failed += keywords_insert("SIZE", sqlSIZE);
398 352 : failed += keywords_insert("STORAGE", STORAGE);
399 :
400 352 : failed += keywords_insert("TYPE", TYPE);
401 352 : failed += keywords_insert("PROCEDURE", PROCEDURE);
402 352 : failed += keywords_insert("FUNCTION", FUNCTION);
403 352 : failed += keywords_insert("LOADER", sqlLOADER);
404 352 : failed += keywords_insert("REPLACE", REPLACE);
405 :
406 352 : failed += keywords_insert("FIELD", FIELD);
407 352 : failed += keywords_insert("FILTER", FILTER);
408 352 : failed += keywords_insert("AGGREGATE", AGGREGATE);
409 352 : failed += keywords_insert("RETURNS", RETURNS);
410 352 : failed += keywords_insert("EXTERNAL", EXTERNAL);
411 352 : failed += keywords_insert("NAME", sqlNAME);
412 352 : failed += keywords_insert("RETURN", RETURN);
413 352 : failed += keywords_insert("CALL", CALL);
414 352 : failed += keywords_insert("LANGUAGE", LANGUAGE);
415 :
416 352 : failed += keywords_insert("ANALYZE", ANALYZE);
417 352 : failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
418 352 : failed += keywords_insert("PLAN", SQL_PLAN);
419 352 : failed += keywords_insert("TRACE", SQL_TRACE);
420 352 : failed += keywords_insert("PREPARE", PREPARE);
421 352 : failed += keywords_insert("PREP", PREP);
422 352 : failed += keywords_insert("EXECUTE", EXECUTE);
423 352 : failed += keywords_insert("EXEC", EXEC);
424 352 : failed += keywords_insert("DEALLOCATE", DEALLOCATE);
425 :
426 352 : failed += keywords_insert("INDEX", INDEX);
427 :
428 352 : failed += keywords_insert("SEQUENCE", SEQUENCE);
429 352 : failed += keywords_insert("RESTART", RESTART);
430 352 : failed += keywords_insert("INCREMENT", INCREMENT);
431 352 : failed += keywords_insert("MAXVALUE", MAXVALUE);
432 352 : failed += keywords_insert("MINVALUE", MINVALUE);
433 352 : failed += keywords_insert("CYCLE", CYCLE);
434 352 : failed += keywords_insert("CACHE", CACHE);
435 352 : failed += keywords_insert("NEXT", NEXT);
436 352 : failed += keywords_insert("VALUE", VALUE);
437 352 : failed += keywords_insert("GENERATED", GENERATED);
438 352 : failed += keywords_insert("ALWAYS", ALWAYS);
439 352 : failed += keywords_insert("IDENTITY", IDENTITY);
440 352 : failed += keywords_insert("SERIAL", SERIAL);
441 352 : failed += keywords_insert("BIGSERIAL", BIGSERIAL);
442 352 : failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
443 352 : failed += keywords_insert("CONTINUE", CONTINUE);
444 :
445 352 : failed += keywords_insert("TRIGGER", TRIGGER);
446 352 : failed += keywords_insert("ATOMIC", ATOMIC);
447 352 : failed += keywords_insert("BEGIN", BEGIN);
448 352 : failed += keywords_insert("OF", OF);
449 352 : failed += keywords_insert("BEFORE", BEFORE);
450 352 : failed += keywords_insert("AFTER", AFTER);
451 352 : failed += keywords_insert("ROW", ROW);
452 352 : failed += keywords_insert("STATEMENT", STATEMENT);
453 352 : failed += keywords_insert("NEW", sqlNEW);
454 352 : failed += keywords_insert("OLD", OLD);
455 352 : failed += keywords_insert("EACH", EACH);
456 352 : failed += keywords_insert("REFERENCING", REFERENCING);
457 :
458 352 : failed += keywords_insert("RANGE", RANGE);
459 352 : failed += keywords_insert("UNBOUNDED", UNBOUNDED);
460 352 : failed += keywords_insert("PRECEDING", PRECEDING);
461 352 : failed += keywords_insert("FOLLOWING", FOLLOWING);
462 352 : failed += keywords_insert("CURRENT", CURRENT);
463 352 : failed += keywords_insert("EXCLUDE", EXCLUDE);
464 352 : failed += keywords_insert("OTHERS", OTHERS);
465 352 : failed += keywords_insert("TIES", TIES);
466 352 : failed += keywords_insert("GROUPS", GROUPS);
467 352 : failed += keywords_insert("WINDOW", WINDOW);
468 :
469 : /* special SQL/XML keywords */
470 352 : failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
471 352 : failed += keywords_insert("XMLCONCAT", XMLCONCAT);
472 352 : failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
473 352 : failed += keywords_insert("XMLELEMENT", XMLELEMENT);
474 352 : failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
475 352 : failed += keywords_insert("XMLFOREST", XMLFOREST);
476 352 : failed += keywords_insert("XMLPARSE", XMLPARSE);
477 352 : failed += keywords_insert("STRIP", STRIP);
478 352 : failed += keywords_insert("WHITESPACE", WHITESPACE);
479 352 : failed += keywords_insert("XMLPI", XMLPI);
480 352 : failed += keywords_insert("XMLQUERY", XMLQUERY);
481 352 : failed += keywords_insert("PASSING", PASSING);
482 352 : failed += keywords_insert("XMLTEXT", XMLTEXT);
483 352 : failed += keywords_insert("NIL", NIL);
484 352 : failed += keywords_insert("REF", REF);
485 352 : failed += keywords_insert("ABSENT", ABSENT);
486 352 : failed += keywords_insert("DOCUMENT", DOCUMENT);
487 352 : failed += keywords_insert("ELEMENT", ELEMENT);
488 352 : failed += keywords_insert("CONTENT", CONTENT);
489 352 : failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
490 352 : failed += keywords_insert("NAMESPACE", NAMESPACE);
491 352 : failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
492 352 : failed += keywords_insert("RETURNING", RETURNING);
493 352 : failed += keywords_insert("RECURSIVE", RECURSIVE);
494 352 : failed += keywords_insert("LOCATION", LOCATION);
495 352 : failed += keywords_insert("ID", ID);
496 352 : failed += keywords_insert("ACCORDING", ACCORDING);
497 352 : failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
498 352 : failed += keywords_insert("URI", URI);
499 352 : failed += keywords_insert("XMLAGG", XMLAGG);
500 :
501 : /* keywords for opengis */
502 352 : failed += keywords_insert("GEOMETRY", GEOMETRY);
503 :
504 352 : failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
505 352 : failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
506 352 : failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
507 352 : failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
508 352 : failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
509 352 : failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
510 352 : failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
511 :
512 352 : failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
513 352 : failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
514 352 : failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
515 352 : failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
516 352 : failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
517 352 : failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
518 352 : failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
519 :
520 352 : failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
521 352 : failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
522 352 : failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
523 352 : failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
524 352 : failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
525 352 : failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
526 352 : failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
527 :
528 352 : failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
529 352 : failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
530 352 : failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
531 352 : failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
532 352 : failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
533 352 : failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
534 352 : failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
535 352 : failed += keywords_insert("LOGIN", LOGIN);
536 : // odbc keywords
537 352 : failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
538 352 : failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
539 352 : failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
540 352 : failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
541 352 : failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
542 352 : failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
543 352 : failed += keywords_insert("DAYNAME", DAYNAME);
544 352 : failed += keywords_insert("IFNULL", IFNULL);
545 352 : failed += keywords_insert("MONTHNAME", MONTHNAME);
546 352 : failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
547 352 : failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
548 352 : failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
549 352 : failed += keywords_insert("SQL_BINARY", SQL_BINARY);
550 352 : failed += keywords_insert("SQL_BIT", SQL_BIT);
551 352 : failed += keywords_insert("SQL_CHAR", SQL_CHAR);
552 352 : failed += keywords_insert("SQL_DATE", SQL_DATE);
553 352 : failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
554 352 : failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
555 352 : failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
556 352 : failed += keywords_insert("SQL_GUID", SQL_GUID);
557 352 : failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
558 352 : failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
559 352 : failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
560 352 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
561 352 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
562 352 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
563 352 : failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
564 352 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
565 352 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
566 352 : failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
567 352 : failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
568 352 : failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
569 352 : failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
570 352 : failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
571 352 : failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
572 352 : failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
573 352 : failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
574 352 : failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
575 352 : failed += keywords_insert("SQL_REAL", SQL_REAL);
576 352 : failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
577 352 : failed += keywords_insert("SQL_TIME", SQL_TIME);
578 352 : failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
579 352 : failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
580 352 : failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
581 352 : failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
582 352 : failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
583 352 : failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
584 352 : failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
585 352 : failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
586 352 : failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
587 352 : failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
588 352 : failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
589 352 : failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
590 352 : failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
591 352 : failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
592 352 : failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
593 352 : failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
594 :
595 352 : failed += keywords_insert("LEAST", MARGFUNC);
596 352 : failed += keywords_insert("GREATEST", MARGFUNC);
597 352 : return failed;
598 : }
599 :
600 : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
601 :
602 : void
603 248586 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
604 : {
605 497172 : *s = (struct scanner) {
606 : .rs = rs,
607 : .ws = ws,
608 : .mode = LINE_N,
609 248586 : .raw_string_mode = GDKgetenv_istrue("raw_strings"),
610 : .aborted = false,
611 : };
612 248586 : }
613 :
614 : void
615 1321283 : scanner_query_processed(struct scanner *s)
616 : {
617 1321283 : int cur;
618 :
619 1321283 : if (s->yybak) {
620 515750 : s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
621 515750 : s->yybak = 0;
622 : }
623 1321283 : if (s->rs) {
624 1321283 : s->rs->pos += s->yycur;
625 : /* completely eat the query including white space after the ; */
626 2482194 : while (s->rs->pos < s->rs->len &&
627 2145384 : (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
628 1160911 : s->rs->pos++;
629 : }
630 : }
631 : /*assert(s->rs->pos <= s->rs->len);*/
632 1321283 : s->yycur = 0;
633 1321283 : s->started = 0;
634 1321283 : s->as = 0;
635 1321283 : s->schema = NULL;
636 1321283 : }
637 :
638 : static int
639 33 : scanner_error(mvc *lc, int cur)
640 : {
641 33 : switch (cur) {
642 0 : case EOF:
643 0 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
644 0 : return EOF;
645 33 : default:
646 : /* on Windows at least, iswcntrl returns TRUE for
647 : * U+FEFF, but we just want consistent error
648 : * messages */
649 33 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
650 : }
651 33 : return LEX_ERROR;
652 : }
653 :
654 :
655 : /*
656 : UTF-8 encoding is as follows:
657 : U-00000000 - U-0000007F: 0xxxxxxx
658 : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
659 : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
660 : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
661 : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
662 : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
663 : */
664 : /* To be correctly coded UTF-8, the sequence should be the shortest
665 : possible encoding of the value being encoded. This means that for
666 : an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
667 : utf8chkmsk[n] should be non-zero (else the encoding could be
668 : shorter).
669 : */
670 : static const int utf8chkmsk[] = {
671 : 0x0000007f,
672 : 0x00000780,
673 : 0x0000f800,
674 : 0x001f0000,
675 : 0x03e00000,
676 : 0x7c000000
677 : };
678 :
679 : static void
680 32035939 : utf8_putchar(struct scanner *lc, int ch)
681 : {
682 32035939 : if ((ch) < 0x80) {
683 32035934 : lc->yycur--;
684 5 : } else if ((ch) < 0x800) {
685 0 : lc->yycur -= 2;
686 5 : } else if ((ch) < 0x10000) {
687 5 : lc->yycur -= 3;
688 : } else {
689 0 : lc->yycur -= 4;
690 : }
691 32035939 : }
692 :
693 : static inline int
694 139076486 : scanner_read_more(struct scanner *lc, size_t n)
695 : {
696 139076486 : bstream *b = lc->rs;
697 139076486 : bool more = false;
698 :
699 :
700 139076486 : if (lc->aborted)
701 : return EOF;
702 139080791 : while (b->len < b->pos + lc->yycur + n) {
703 :
704 124849 : if (lc->mode == LINE_1 || !lc->started)
705 : return EOF;
706 :
707 : /* query is not finished ask for more */
708 0 : if (b->eof || !isa_block_stream(b->s)) {
709 0 : if (bstream_getoob(b)) {
710 0 : lc->aborted = true;
711 0 : return EOF;
712 : }
713 2156 : if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
714 2156 : mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
715 2156 : b->eof = false;
716 2156 : more = true;
717 : }
718 : /* we need more query text */
719 4312 : if (bstream_next(b) < 0) {
720 0 : if (mnstr_errnr(b->s) == MNSTR_INTERRUPT) {
721 : // now what?
722 0 : lc->errstr = "Query aborted";
723 0 : lc->aborted = true;
724 0 : mnstr_clearerr(b->s);
725 : }
726 0 : return EOF;
727 4312 : } else if (/* we asked for more data but didn't get any */
728 2156 : (more && b->eof && b->len < b->pos + lc->yycur + n))
729 : return EOF;
730 4305 : if (more && b->pos + lc->yycur + 2 == b->len && b->buf[b->pos + lc->yycur] == '\200' && b->buf[b->pos + lc->yycur + 1] == '\n') {
731 0 : lc->errstr = "Query aborted";
732 0 : b->len -= 2;
733 0 : b->buf[b->len] = 0;
734 0 : return EOF;
735 : }
736 : }
737 : return 1;
738 : }
739 :
740 : static inline int
741 137806405 : scanner_getc(struct scanner *lc)
742 : {
743 137806405 : bstream *b = lc->rs;
744 137806405 : unsigned char *s = NULL;
745 137806405 : int c, m, n, mask;
746 :
747 137806405 : if (scanner_read_more(lc, 1) == EOF) {
748 : //lc->errstr = SQLSTATE(42000) "end of input stream";
749 : return EOF;
750 : }
751 137718309 : lc->errstr = NULL;
752 :
753 137718309 : s = (unsigned char *) b->buf + b->pos + lc->yycur++;
754 137718309 : if (((c = *s) & 0x80) == 0) {
755 : /* 7-bit char */
756 : return c;
757 : }
758 88250 : for (n = 0, m = 0x40; c & m; n++, m >>= 1)
759 : ;
760 : /* n now is number of 10xxxxxx bytes that should follow */
761 29443 : if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
762 : /* incorrect UTF-8 sequence */
763 : /* n==0: c == 10xxxxxx */
764 : /* n>=6: c == 1111111x */
765 0 : lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
766 0 : goto error;
767 : }
768 :
769 29443 : if (scanner_read_more(lc, (size_t) n) == EOF)
770 : return EOF;
771 29443 : s = (unsigned char *) b->buf + b->pos + lc->yycur;
772 :
773 29443 : mask = utf8chkmsk[n];
774 29443 : c &= ~(0xFFC0 >> n); /* remove non-x bits */
775 88249 : while (--n >= 0) {
776 58807 : c <<= 6;
777 58807 : lc->yycur++;
778 58807 : if (((m = *s++) & 0xC0) != 0x80) {
779 : /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
780 : /* this includes end-of-string (m == 0) */
781 1 : lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
782 1 : goto error;
783 : }
784 58806 : c |= m & 0x3F;
785 : }
786 29442 : if ((c & mask) == 0) {
787 : /* incorrect UTF-8 sequence: not shortest possible */
788 0 : lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
789 0 : goto error;
790 : }
791 :
792 : return c;
793 :
794 1 : error:
795 1 : if (b->pos + lc->yycur < b->len) /* skip bogus char */
796 0 : lc->yycur++;
797 : return EOF;
798 : }
799 :
800 : static int
801 28612864 : scanner_token(struct scanner *lc, int token)
802 : {
803 28612864 : lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
804 28612864 : lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
805 28612864 : lc->yyval = token;
806 28612864 : return lc->yyval;
807 : }
808 :
809 : static int
810 2134549 : scanner_string(mvc *c, int quote, bool escapes)
811 : {
812 2134549 : struct scanner *lc = &c->scanner;
813 2134549 : bstream *rs = lc->rs;
814 2134549 : int cur = quote;
815 2134549 : bool escape = false;
816 2134549 : const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
817 :
818 2134549 : lc->started = 1;
819 2172477 : while (cur != EOF) {
820 2172462 : size_t pos = 0;
821 2172462 : const size_t yycur = rs->pos + lc->yycur;
822 :
823 35509646 : while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
824 33337184 : (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
825 66644914 : cur && (cur != quote || escape)) {
826 31164723 : if (escapes && cur == '\\')
827 6762 : escape = !escape;
828 : else
829 : escape = false;
830 : }
831 2172462 : if (pos == limit) {
832 0 : (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
833 0 : return LEX_ERROR;
834 : }
835 : /* BOM character not allowed as an identifier */
836 2172462 : if (cur == EOF || (quote == '"' && cur == 0xFEFF))
837 1 : return scanner_error(c, cur);
838 2172461 : lc->yycur += pos;
839 : /* check for quote escaped quote: Obscure SQL Rule */
840 2172461 : if (cur == quote && rs->buf[yycur + pos] == quote) {
841 8487 : lc->yycur++;
842 8487 : continue;
843 : }
844 2163974 : assert(yycur + pos <= rs->len + 1);
845 2163974 : if (cur == quote && !escape) {
846 2134519 : return scanner_token(lc, STRING);
847 : }
848 29455 : lc->yycur--; /* go back to current (possibly invalid) char */
849 : /* long utf8, if correct isn't the quote */
850 29455 : if (!cur) {
851 30 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
852 14 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
853 14 : return LEX_ERROR;
854 : }
855 16 : cur = scanner_read_more(lc, 1);
856 : } else {
857 29425 : cur = scanner_getc(lc);
858 : }
859 : }
860 15 : (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
861 15 : return EOF;
862 : }
863 :
864 : /* scan a structure {blah} into a string. We only count the matching {}
865 : * unless escaped. We do not consider embeddings in string literals yet
866 : */
867 :
868 : static int
869 234 : scanner_body(mvc *c)
870 : {
871 234 : struct scanner *lc = &c->scanner;
872 234 : bstream *rs = lc->rs;
873 234 : int cur = (int) 'x';
874 234 : int blk = 1;
875 234 : bool escape = false;
876 :
877 234 : lc->started = 1;
878 234 : assert(rs->buf[rs->pos + lc->yycur-1] == '{');
879 290 : while (cur != EOF) {
880 290 : size_t pos = rs->pos + lc->yycur;
881 :
882 32350 : while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
883 32060 : if (cur != '\\')
884 : escape = false;
885 : else
886 12 : escape = !escape;
887 32060 : blk += cur =='{';
888 32060 : blk -= cur =='}';
889 : }
890 290 : lc->yycur = pos - rs->pos;
891 290 : assert(pos <= rs->len + 1);
892 290 : if (blk == 0 && !escape){
893 234 : lc->yycur--; /* go back to current (possibly invalid) char */
894 234 : return scanner_token(lc, X_BODY);
895 : }
896 56 : lc->yycur--; /* go back to current (possibly invalid) char */
897 56 : if (!cur) {
898 56 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
899 0 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
900 0 : return LEX_ERROR;
901 : }
902 56 : cur = scanner_read_more(lc, 1);
903 : } else {
904 0 : cur = scanner_getc(lc);
905 : }
906 : }
907 0 : (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
908 0 : return EOF;
909 : }
910 :
911 : static int
912 13748732 : keyword_or_ident(mvc * c, int cur)
913 : {
914 13748732 : struct scanner *lc = &c->scanner;
915 13748732 : keyword *k = NULL;
916 13748732 : size_t s;
917 :
918 13748732 : lc->started = 1;
919 13748732 : utf8_putchar(lc, cur);
920 13748718 : s = lc->yycur;
921 13748718 : lc->yyval = IDENT;
922 82229891 : while ((cur = scanner_getc(lc)) != EOF) {
923 82229783 : if (!iswalnum(cur) && cur != '_') {
924 13748610 : utf8_putchar(lc, cur);
925 13748642 : (void)scanner_token(lc, IDENT);
926 13748642 : if ((k = find_keyword_bs(lc,s)))
927 8431231 : lc->yyval = k->token;
928 13748909 : return lc->yyval;
929 : }
930 : }
931 : if (cur < 0)
932 : return cur;
933 : (void)scanner_token(lc, IDENT);
934 : if ((k = find_keyword_bs(lc,s)))
935 : lc->yyval = k->token;
936 : return lc->yyval;
937 : }
938 :
939 : static int
940 14354737 : skip_white_space(struct scanner * lc)
941 : {
942 17963539 : int cur;
943 :
944 17963539 : do {
945 17963539 : lc->yysval = lc->yycur;
946 17963539 : } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
947 14353715 : return cur;
948 : }
949 :
950 : static int
951 70455 : skip_c_comment(struct scanner * lc)
952 : {
953 70455 : int cur;
954 70455 : int prev = 0;
955 70455 : int started = lc->started;
956 70455 : int depth = 1;
957 :
958 70455 : lc->started = 1;
959 1420738 : while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
960 1350283 : if (prev == '*' && cur == '/')
961 70455 : depth--;
962 1279828 : else if (prev == '/' && cur == '*') {
963 : /* block comments can nest */
964 0 : cur = 0; /* prevent slash-star-slash from matching */
965 0 : depth++;
966 : }
967 : prev = cur;
968 : }
969 70455 : lc->yysval = lc->yycur;
970 70455 : lc->started = started;
971 : /* a comment is equivalent to a newline */
972 70455 : return cur == EOF ? cur : '\n';
973 : }
974 :
975 : static int
976 3324 : skip_sql_comment(struct scanner * lc)
977 : {
978 3324 : int cur;
979 3324 : int started = lc->started;
980 :
981 3324 : lc->started = 1;
982 835132 : while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
983 : ;
984 3324 : lc->yysval = lc->yycur;
985 3324 : lc->started = started;
986 : /* a comment is equivalent to a newline */
987 3324 : return cur;
988 : }
989 :
990 : static int tokenize(mvc * lc, int cur);
991 :
992 5776632 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
993 13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
994 10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
995 3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
996 :
997 1926537 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
998 1926537 : struct scanner *lc = &c->scanner;
999 1926537 : bool (*is_valid_n_ary_digit)(int);
1000 :
1001 1926537 : if (pcur == '_' && !initial_underscore_allowed) /* ERROR: initial underscore not allowed */ {
1002 0 : *token = 0;
1003 0 : return '_';
1004 : }
1005 :
1006 1926537 : switch (type) {
1007 : case BINARYNUM:
1008 : is_valid_n_ary_digit = &is_valid_binary_digit;
1009 : break;
1010 3 : case OCTALNUM:
1011 3 : is_valid_n_ary_digit = &is_valid_octal_digit;
1012 3 : break;
1013 280 : case HEXADECIMALNUM:
1014 280 : is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
1015 280 : break;
1016 1926252 : default:
1017 1926252 : is_valid_n_ary_digit = &is_valid_decimal_digit;
1018 1926252 : break;
1019 : }
1020 :
1021 1926537 : if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
1022 18 : *token = 0;
1023 18 : return pcur;
1024 : }
1025 :
1026 1926672 : int cur = scanner_getc(lc);
1027 1927196 : *token = type;
1028 3864059 : while (cur != EOF) {
1029 3863999 : if (cur == '_') {
1030 25 : if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
1031 2 : *token = 0;
1032 2 : return '_';
1033 : }
1034 : }
1035 3863974 : else if (!is_valid_n_ary_digit(cur))
1036 : break;
1037 1937484 : pcur = cur;
1038 1937484 : cur = scanner_getc(lc);
1039 : }
1040 :
1041 1926495 : if (pcur == '_') {
1042 3 : *token = 0;
1043 3 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1044 : return cur;
1045 : else /* ERROR: number ends with underscore */
1046 : return '_';
1047 : }
1048 :
1049 : return cur;
1050 : }
1051 :
1052 : static int
1053 1912979 : number(mvc * c, int cur)
1054 : {
1055 1912979 : struct scanner *lc = &c->scanner;
1056 1912979 : int token = sqlINT;
1057 :
1058 : /* a number has one of these forms (expressed in regular expressions):
1059 : * 0x[0-9A-Fa-f]+ -- (hexadecimal) INTEGER
1060 : * \.[0-9]+ -- DECIMAL
1061 : * [0-9]+\.[0-9]* -- DECIMAL
1062 : * [0-9]+@0 -- OID
1063 : * [0-9]*\.[0-9]+[eE][-+]?[0-9]+ -- REAL
1064 : * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
1065 : * [0-9]+ -- (decimal) INTEGER
1066 : */
1067 1912979 : lc->started = 1;
1068 1912979 : if (cur == '0') {
1069 317825 : switch ((cur = scanner_getc(lc))) {
1070 2 : case 'b':
1071 2 : cur = scanner_getc(lc);
1072 2 : if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
1073 : break;
1074 3 : case 'o':
1075 3 : cur = scanner_getc(lc);
1076 3 : if ((cur = check_validity_number(c, cur, true, &token, OCTALNUM)) == EOF) return cur;
1077 : break;
1078 280 : case 'x':
1079 280 : cur = scanner_getc(lc);
1080 280 : if ((cur = check_validity_number(c, cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
1081 : break;
1082 317542 : default:
1083 317542 : utf8_putchar(lc, cur);
1084 317542 : cur = '0';
1085 : }
1086 : }
1087 1912980 : if (token == sqlINT) {
1088 1913020 : if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
1089 1912893 : if (cur == '@') {
1090 0 : if (token == sqlINT) {
1091 0 : cur = scanner_getc(lc);
1092 0 : if (cur == EOF)
1093 : return cur;
1094 0 : if (cur == '0') {
1095 0 : cur = scanner_getc(lc);
1096 0 : if (cur == EOF)
1097 : return cur;
1098 0 : token = OIDNUM;
1099 : } else {
1100 : /* number + '@' not followed by 0: show '@' as erroneous */
1101 0 : utf8_putchar(lc, cur);
1102 0 : cur = '@';
1103 0 : token = 0;
1104 : }
1105 : }
1106 : } else {
1107 1912893 : if (cur == '.') {
1108 11120 : cur = scanner_getc(lc);
1109 11120 : if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
1110 11113 : if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
1111 : }
1112 1912893 : if (token != 0)
1113 1912635 : if (cur == 'e' || cur == 'E') {
1114 2226 : cur = scanner_getc(lc);
1115 2226 : if (cur == '+' || cur == '-')
1116 2111 : cur = scanner_getc(lc);
1117 2226 : if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
1118 : }
1119 : }
1120 : }
1121 :
1122 1910627 : assert(cur != EOF);
1123 :
1124 1912853 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1125 6 : token = 0;
1126 :
1127 1912853 : utf8_putchar(lc, cur);
1128 :
1129 1912871 : if (token) {
1130 1912861 : return scanner_token(lc, token);
1131 : } else {
1132 10 : (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
1133 10 : return LEX_ERROR;
1134 : }
1135 : }
1136 :
1137 : static
1138 13018014 : int scanner_symbol(mvc * c, int cur)
1139 : {
1140 13018014 : struct scanner *lc = &c->scanner;
1141 13018014 : int next = 0;
1142 13018014 : int started = lc->started;
1143 :
1144 13018014 : switch (cur) {
1145 73105 : case '/':
1146 73105 : lc->started = 1;
1147 73105 : next = scanner_getc(lc);
1148 73105 : if (next < 0)
1149 : return EOF;
1150 73105 : if (next == '*') {
1151 70455 : lc->started = started;
1152 70455 : cur = skip_c_comment(lc);
1153 70455 : if (cur < 0)
1154 : return EOF;
1155 70455 : return tokenize(c, cur);
1156 : } else {
1157 2650 : utf8_putchar(lc, next);
1158 2650 : return scanner_token(lc, cur);
1159 : }
1160 0 : case '0':
1161 : case '1':
1162 : case '2':
1163 : case '3':
1164 : case '4':
1165 : case '5':
1166 : case '6':
1167 : case '7':
1168 : case '8':
1169 : case '9':
1170 0 : return number(c, cur);
1171 8 : case '#':
1172 8 : if ((cur = skip_sql_comment(lc)) == EOF)
1173 : return cur;
1174 8 : return tokenize(c, cur);
1175 814454 : case '\'':
1176 814454 : if (lc->raw_string_mode || lc->next_string_is_raw)
1177 50 : return scanner_string(c, cur, false);
1178 814404 : return scanner_string(c, cur, true);
1179 1312913 : case '"':
1180 1312913 : return scanner_string(c, cur, false);
1181 500 : case '{':
1182 : // if previous tokens like LANGUAGE IDENT
1183 : // TODO checking on IDENT only may not be enough
1184 500 : if (lc->yylast == IDENT)
1185 234 : return scanner_body(c);
1186 266 : lc->started = 1;
1187 266 : return scanner_token(lc, cur);
1188 266 : case '}':
1189 266 : lc->started = 1;
1190 266 : return scanner_token(lc, cur);
1191 30325 : case '-':
1192 30325 : lc->started = 1;
1193 30325 : next = scanner_getc(lc);
1194 30325 : if (next < 0)
1195 : return EOF;
1196 30324 : if (next == '-') {
1197 3316 : lc->started = started;
1198 3316 : if ((cur = skip_sql_comment(lc)) == EOF)
1199 : return cur;
1200 3316 : return tokenize(c, cur);
1201 : }
1202 27008 : lc->started = 1;
1203 27008 : utf8_putchar(lc, next);
1204 27008 : return scanner_token(lc, cur);
1205 12 : case '~': /* binary not */
1206 12 : lc->started = 1;
1207 12 : next = scanner_getc(lc);
1208 12 : if (next < 0)
1209 : return EOF;
1210 12 : if (next == '=')
1211 5 : return scanner_token(lc, GEOM_MBR_EQUAL);
1212 7 : utf8_putchar(lc, next);
1213 7 : return scanner_token(lc, cur);
1214 7275887 : case '^': /* binary xor */
1215 : case '*':
1216 : case '?':
1217 : case ':':
1218 : case '%':
1219 : case '+':
1220 : case '(':
1221 : case ')':
1222 : case ',':
1223 : case '=':
1224 : case '[':
1225 : case ']':
1226 7275887 : lc->started = 1;
1227 7275887 : return scanner_token(lc, cur);
1228 6325 : case '&':
1229 6325 : lc->started = 1;
1230 6325 : cur = scanner_getc(lc);
1231 6325 : if (cur < 0)
1232 : return EOF;
1233 6325 : if (cur < 0)
1234 : return EOF;
1235 6325 : if(cur == '<') {
1236 3 : next = scanner_getc(lc);
1237 3 : if (next < 0)
1238 : return EOF;
1239 3 : if(next == '|') {
1240 0 : return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
1241 : } else {
1242 3 : utf8_putchar(lc, next); //put the char back
1243 3 : return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
1244 : }
1245 6322 : } else if(cur == '>')
1246 3 : return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
1247 6319 : else if(cur == '&')
1248 3 : return scanner_token(lc, GEOM_OVERLAP);
1249 : else {/* binary and */
1250 6316 : utf8_putchar(lc, cur); //put the char back
1251 6316 : return scanner_token(lc, '&');
1252 : }
1253 19 : case '@':
1254 19 : lc->started = 1;
1255 19 : return scanner_token(lc, AT);
1256 1000649 : case ';':
1257 1000649 : lc->started = 0;
1258 1000649 : return scanner_token(lc, SCOLON);
1259 27 : case '!':
1260 27 : lc->started = 1;
1261 27 : cur = scanner_getc(lc);
1262 27 : if (cur < 0)
1263 : return EOF;
1264 27 : else if (cur == '=') {
1265 21 : lc->rs->buf[lc->rs->pos + lc->yycur - 2] = '<';
1266 21 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = '>';
1267 21 : return scanner_token( lc, COMPARISON);
1268 : } else {
1269 6 : utf8_putchar(lc, cur); //put the char back
1270 : }
1271 6 : return scanner_token(lc, '!');
1272 52291 : case '<':
1273 52291 : lc->started = 1;
1274 52291 : cur = scanner_getc(lc);
1275 52291 : if (cur < 0)
1276 : return EOF;
1277 52291 : if (cur == '=') {
1278 3128 : return scanner_token( lc, COMPARISON);
1279 49163 : } else if (cur == '>') {
1280 35507 : return scanner_token( lc, COMPARISON);
1281 13656 : } else if (cur == '<') {
1282 44 : next = scanner_getc(lc);
1283 44 : if (next < 0)
1284 : return EOF;
1285 44 : if (next == '=') {
1286 4 : return scanner_token( lc, LEFT_SHIFT_ASSIGN);
1287 40 : } else if (next == '|') {
1288 1 : return scanner_token(lc, GEOM_BELOW);
1289 : } else {
1290 39 : utf8_putchar(lc, next); //put the char back
1291 39 : return scanner_token( lc, LEFT_SHIFT);
1292 : }
1293 13612 : } else if(cur == '-') {
1294 19 : next = scanner_getc(lc);
1295 19 : if (next < 0)
1296 : return EOF;
1297 19 : if(next == '>') {
1298 7 : return scanner_token(lc, GEOM_DIST);
1299 : } else {
1300 : //put the characters back and fall in the next possible case
1301 12 : utf8_putchar(lc, next);
1302 12 : utf8_putchar(lc, cur);
1303 12 : return scanner_token( lc, COMPARISON);
1304 : }
1305 : } else {
1306 13593 : utf8_putchar(lc, cur);
1307 13593 : return scanner_token( lc, COMPARISON);
1308 : }
1309 47489 : case '>':
1310 47489 : lc->started = 1;
1311 47489 : cur = scanner_getc(lc);
1312 47489 : if (cur < 0)
1313 : return EOF;
1314 47489 : if (cur == '>') {
1315 2683 : cur = scanner_getc(lc);
1316 2683 : if (cur < 0)
1317 : return EOF;
1318 2683 : if (cur == '=')
1319 3 : return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
1320 2680 : utf8_putchar(lc, cur);
1321 2680 : return scanner_token( lc, RIGHT_SHIFT);
1322 44806 : } else if (cur != '=') {
1323 42548 : utf8_putchar(lc, cur);
1324 42548 : return scanner_token( lc, COMPARISON);
1325 : } else {
1326 2258 : return scanner_token( lc, COMPARISON);
1327 : }
1328 2217126 : case '.':
1329 2217126 : lc->started = 1;
1330 2217126 : cur = scanner_getc(lc);
1331 2217126 : if (cur < 0)
1332 : return EOF;
1333 2217125 : if (!iswdigit(cur)) {
1334 2217111 : utf8_putchar(lc, cur);
1335 2217111 : return scanner_token( lc, '.');
1336 : } else {
1337 14 : utf8_putchar(lc, cur);
1338 14 : cur = '.';
1339 14 : return number(c, cur);
1340 : }
1341 186608 : case '|': /* binary or or string concat */
1342 186608 : lc->started = 1;
1343 186608 : cur = scanner_getc(lc);
1344 186608 : if (cur < 0)
1345 : return EOF;
1346 186608 : if (cur == '|') {
1347 186583 : return scanner_token(lc, CONCATSTRING);
1348 25 : } else if (cur == '&') {
1349 0 : next = scanner_getc(lc);
1350 0 : if (next < 0)
1351 : return EOF;
1352 0 : if(next == '>') {
1353 0 : return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
1354 : } else {
1355 0 : utf8_putchar(lc, next); //put the char back
1356 0 : utf8_putchar(lc, cur); //put the char back
1357 0 : return scanner_token(lc, '|');
1358 : }
1359 25 : } else if (cur == '>') {
1360 1 : next = scanner_getc(lc);
1361 1 : if (next < 0)
1362 : return EOF;
1363 1 : if(next == '>') {
1364 1 : return scanner_token(lc, GEOM_ABOVE);
1365 : } else {
1366 0 : utf8_putchar(lc, next); //put the char back
1367 0 : utf8_putchar(lc, cur); //put the char back
1368 0 : return scanner_token(lc, '|');
1369 : }
1370 : } else {
1371 24 : utf8_putchar(lc, cur);
1372 24 : return scanner_token(lc, '|');
1373 : }
1374 : }
1375 10 : (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
1376 10 : return LEX_ERROR;
1377 : }
1378 :
1379 : static int
1380 28705103 : tokenize(mvc * c, int cur)
1381 : {
1382 28705103 : struct scanner *lc = &c->scanner;
1383 57369655 : while (1) {
1384 43037379 : if (cur == 0xFEFF) {
1385 : /* on Linux at least, iswpunct returns TRUE
1386 : * for U+FEFF, but we don't want that, we just
1387 : * want to go to the scanner_error case
1388 : * below */
1389 : ;
1390 43037995 : } else if (iswspace(cur)) {
1391 14350336 : if ((cur = skip_white_space(lc)) == EOF)
1392 : return cur;
1393 14332276 : continue; /* try again */
1394 28687659 : } else if (iswdigit(cur)) {
1395 1913611 : return number(c, cur);
1396 26774048 : } else if (iswalpha(cur) || cur == '_') {
1397 13719137 : switch (cur) {
1398 657864 : case 'e': /* string with escapes */
1399 : case 'E':
1400 657864 : if (scanner_read_more(lc, 1) != EOF &&
1401 657864 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1402 3885 : return scanner_string(c, scanner_getc(lc), true);
1403 : }
1404 : break;
1405 419551 : case 'x': /* blob */
1406 : case 'X':
1407 : case 'r': /* raw string */
1408 : case 'R':
1409 419551 : if (scanner_read_more(lc, 1) != EOF &&
1410 419551 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1411 3280 : return scanner_string(c, scanner_getc(lc), false);
1412 : }
1413 : break;
1414 159854 : case 'u': /* unicode string */
1415 : case 'U':
1416 159854 : if (scanner_read_more(lc, 1) != EOF &&
1417 159871 : lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
1418 17 : scanner_read_more(lc, 2) != EOF &&
1419 17 : (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
1420 : lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
1421 17 : cur = scanner_getc(lc); /* '&' */
1422 17 : return scanner_string(c, scanner_getc(lc), false);
1423 : }
1424 : break;
1425 : default:
1426 : break;
1427 : }
1428 13748775 : return keyword_or_ident(c, cur);
1429 13018091 : } else if (iswpunct(cur)) {
1430 13017443 : return scanner_symbol(c, cur);
1431 : }
1432 32 : if (cur == EOF) {
1433 0 : if (lc->mode == LINE_1 || !lc->started )
1434 : return cur;
1435 0 : return scanner_error(c, cur);
1436 : }
1437 : /* none of the above: error */
1438 32 : return scanner_error(c, cur);
1439 : }
1440 : }
1441 :
1442 : /* SQL 'quoted' idents consist of a set of any character of
1443 : * the source language character set other than a 'quote'
1444 : *
1445 : * MonetDB has 3 restrictions:
1446 : * 1 we disallow '%' as the first character.
1447 : * 2 the length is limited to 1024 characters
1448 : * 3 the identifier 'TID%' is not allowed
1449 : */
1450 : static bool
1451 1312902 : valid_ident(const char *restrict s, char *restrict dst)
1452 : {
1453 1312902 : int p = 0;
1454 :
1455 1312902 : if (*s == '%')
1456 : return false;
1457 :
1458 9708597 : while (*s) {
1459 8395695 : if ((dst[p++] = *s++) == '"' && *s == '"')
1460 68 : s++;
1461 8395695 : if (p >= 1024)
1462 : return false;
1463 : }
1464 1312902 : dst[p] = '\0';
1465 1312902 : if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
1466 : return false;
1467 : return true;
1468 : }
1469 :
1470 : static inline int
1471 28794837 : sql_get_next_token(YYSTYPE *yylval, void *parm)
1472 : {
1473 28794837 : mvc *c = (mvc*)parm;
1474 28794837 : struct scanner *lc = &c->scanner;
1475 28794837 : int token = 0, cur = 0;
1476 :
1477 28794837 : if (lc->rs->buf == NULL) /* malloc failure */
1478 : return EOF;
1479 :
1480 28794837 : if (lc->yynext) {
1481 62693 : int next = lc->yynext;
1482 :
1483 62693 : lc->yynext = 0;
1484 62693 : return(next);
1485 : }
1486 :
1487 28732144 : if (lc->yybak) {
1488 27701876 : lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
1489 27701876 : lc->yybak = 0;
1490 : }
1491 :
1492 28732144 : lc->yysval = lc->yycur;
1493 28732144 : lc->yylast = lc->yyval;
1494 28732144 : cur = scanner_getc(lc);
1495 28742959 : if (cur < 0)
1496 : return EOF;
1497 28632063 : token = tokenize(c, cur);
1498 :
1499 28619786 : yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
1500 :
1501 28619786 : if (token == KW_TYPE)
1502 49319 : token = aTYPE;
1503 :
1504 28619786 : if (token == IDENT || token == COMPARISON ||
1505 23205042 : token == RANK || token == aTYPE || token == MARGFUNC) {
1506 5474588 : yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
1507 5474586 : lc->next_string_is_raw = false;
1508 23145198 : } else if (token == STRING) {
1509 2134519 : char quote = *yylval->sval;
1510 2134519 : char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
1511 2134519 : char *dst;
1512 :
1513 2134519 : assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
1514 :
1515 2134519 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
1516 2134519 : switch (quote) {
1517 1312902 : case '"':
1518 1312902 : if (valid_ident(yylval->sval+1,str)) {
1519 : token = IDENT;
1520 : } else {
1521 0 : sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
1522 0 : return LEX_ERROR;
1523 : }
1524 : break;
1525 3884 : case 'e':
1526 : case 'E':
1527 3884 : assert(yylval->sval[1] == '\'');
1528 3884 : if (GDKstrFromStr((unsigned char *) str,
1529 : (unsigned char *) yylval->sval + 2,
1530 3884 : lc->yycur-lc->yysval - 2, '\'') < 0) {
1531 1 : char *err = GDKerrbuf;
1532 1 : if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
1533 1 : err += strlen(GDKERROR);
1534 0 : else if (*err == '!')
1535 0 : err++;
1536 1 : sql_error(c, 1, SQLSTATE(42000) "%s", err);
1537 1 : return LEX_ERROR;
1538 : }
1539 : quote = '\'';
1540 : break;
1541 17 : case 'u':
1542 : case 'U':
1543 17 : assert(yylval->sval[1] == '&');
1544 17 : assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
1545 17 : strcpy(str, yylval->sval + 3);
1546 17 : token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
1547 17 : quote = yylval->sval[2];
1548 17 : lc->next_string_is_raw = true;
1549 17 : break;
1550 1 : case 'x':
1551 : case 'X':
1552 1 : assert(yylval->sval[1] == '\'');
1553 1 : dst = str;
1554 5 : for (char *src = yylval->sval + 2; *src; dst++)
1555 4 : if ((*dst = *src++) == '\'' && *src == '\'')
1556 0 : src++;
1557 1 : *dst = 0;
1558 1 : quote = '\'';
1559 1 : token = XSTRING;
1560 1 : lc->next_string_is_raw = true;
1561 1 : break;
1562 3272 : case 'r':
1563 : case 'R':
1564 3272 : assert(yylval->sval[1] == '\'');
1565 3272 : dst = str;
1566 450117 : for (char *src = yylval->sval + 2; *src; dst++)
1567 446845 : if ((*dst = *src++) == '\'' && *src == '\'')
1568 2744 : src++;
1569 3272 : quote = '\'';
1570 3272 : *dst = 0;
1571 3272 : break;
1572 814443 : default:
1573 814443 : if (lc->raw_string_mode || lc->next_string_is_raw) {
1574 50 : dst = str;
1575 479 : for (char *src = yylval->sval + 1; *src; dst++)
1576 429 : if ((*dst = *src++) == '\'' && *src == '\'')
1577 3 : src++;
1578 50 : *dst = 0;
1579 : } else {
1580 814392 : if (GDKstrFromStr((unsigned char *)str,
1581 814393 : (unsigned char *)yylval->sval + 1,
1582 814393 : lc->yycur - lc->yysval - 1,
1583 : '\'') < 0) {
1584 1 : sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
1585 1 : return LEX_ERROR;
1586 : }
1587 : }
1588 : break;
1589 : }
1590 2134516 : yylval->sval = str;
1591 :
1592 : /* reset original */
1593 2134516 : lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
1594 : } else {
1595 21010679 : lc->next_string_is_raw = false;
1596 : }
1597 :
1598 : return(token);
1599 : }
1600 :
1601 : static int scanner( YYSTYPE *yylval, void *m, bool log);
1602 :
1603 : static int
1604 28665582 : scanner(YYSTYPE * yylval, void *parm, bool log)
1605 : {
1606 28665582 : int token;
1607 28665582 : mvc *c = (mvc *) parm;
1608 28665582 : struct scanner *lc = &c->scanner;
1609 28665582 : size_t pos;
1610 :
1611 : /* store position for when view's query ends */
1612 28665582 : pos = lc->rs->pos + lc->yycur;
1613 :
1614 28665582 : token = sql_get_next_token(yylval, parm);
1615 :
1616 28659073 : if (token == NOT) {
1617 77052 : int next = scanner(yylval, parm, false);
1618 :
1619 77052 : if (next == NOT) {
1620 2 : return scanner(yylval, parm, false);
1621 : } else if (next == EXISTS) {
1622 : token = NOT_EXISTS;
1623 : } else if (next == BETWEEN) {
1624 : token = NOT_BETWEEN;
1625 : } else if (next == sqlIN) {
1626 : token = NOT_IN;
1627 : } else if (next == LIKE) {
1628 : token = NOT_LIKE;
1629 : } else if (next == ILIKE) {
1630 : token = NOT_ILIKE;
1631 : } else {
1632 62693 : lc->yynext = next;
1633 : }
1634 28582021 : } else if (token == SCOLON) {
1635 : /* ignore semi-colon(s) following a semi-colon */
1636 1000680 : if (lc->yylast == SCOLON) {
1637 131729 : size_t prev = lc->yycur;
1638 131730 : while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
1639 1 : prev = lc->yycur;
1640 :
1641 : /* skip the skipped stuff also in the buffer */
1642 131641 : lc->rs->pos += prev;
1643 131641 : lc->yycur -= prev;
1644 : }
1645 : }
1646 :
1647 28658983 : if (lc->log && log)
1648 0 : mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
1649 :
1650 28658983 : lc->started += (token != EOF);
1651 28658983 : return token;
1652 : }
1653 :
1654 : /* also see sql_parser.y */
1655 : extern int sqllex(YYSTYPE * yylval, void *parm);
1656 :
1657 : int
1658 28589896 : sqllex(YYSTYPE * yylval, void *parm)
1659 : {
1660 28589896 : return scanner(yylval, parm, true);
1661 : }
|