Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include <wctype.h>
15 : #include "sql_mem.h"
16 : #include "sql_scan.h"
17 : #include "sql_types.h"
18 : #include "sql_symbol.h"
19 : #include "sql_mvc.h"
20 : #include "sql_parser.tab.h"
21 : #include "sql_semantic.h"
22 : #include "sql_parser.h" /* for sql_error() */
23 :
24 : #include "stream.h"
25 : #include "mapi_prompt.h"
26 : #include <unistd.h>
27 : #include <string.h>
28 : #include <ctype.h>
29 : #include "sql_keyword.h"
30 :
31 : /**
32 : * Removes all comments before the query. In query comments are kept.
33 : */
34 : char *
35 401909 : query_cleaned(allocator *sa, const char *query)
36 : {
37 401909 : char *q, *r, *c = NULL;
38 401909 : int lines = 0;
39 401909 : int quote = 0; /* inside quotes ('..', "..", {..}) */
40 401909 : bool bs = false; /* seen a backslash in a quoted string */
41 401909 : bool incomment1 = false; /* inside traditional C style comment */
42 401909 : bool incomment2 = false; /* inside comment starting with -- */
43 401909 : bool inline_comment = false;
44 :
45 401909 : r = SA_NEW_ARRAY(sa, char, strlen(query) + 1);
46 401906 : if(!r)
47 : return NULL;
48 :
49 67919755 : for (q = r; *query; query++) {
50 67517849 : if (incomment1) {
51 16116 : if (*query == '/' && query[-1] == '*') {
52 233 : incomment1 = false;
53 233 : if (c == r && lines > 0) {
54 225 : q = r; // reset to beginning
55 225 : lines = 0;
56 225 : continue;
57 : }
58 : }
59 15891 : if (*query == '\n') lines++;
60 15891 : *q++ = *query;
61 67501733 : } else if (incomment2) {
62 827681 : if (*query == '\n') {
63 2834 : incomment2 = false;
64 2834 : inline_comment = false;
65 : /* add newline only if comment doesn't
66 : * occupy whole line */
67 2834 : if (q > r && q[-1] != '\n'){
68 939 : *q++ = '\n';
69 939 : lines++;
70 : }
71 824847 : } else if (inline_comment){
72 20650 : *q++ = *query; // preserve in line query comments
73 : }
74 66674052 : } else if (quote) {
75 22272060 : if (bs) {
76 : bs = false;
77 22268763 : } else if (*query == '\\') {
78 : bs = true;
79 22265466 : } else if (*query == quote) {
80 675117 : quote = 0;
81 : }
82 22272060 : *q++ = *query;
83 44401992 : } else if (*query == '"' || *query == '\'') {
84 674649 : quote = *query;
85 674649 : *q++ = *query;
86 43727343 : } else if (*query == '{') {
87 513 : quote = '}';
88 513 : *q++ = *query;
89 43726830 : } else if (*query == '-' && query[1] == '-') {
90 2834 : if (q > r && q[-1] != '\n') {
91 939 : inline_comment = true;
92 939 : *q++ = *query; // preserve in line query comments
93 : }
94 : incomment2 = true;
95 43723996 : } else if (*query == '/' && query[1] == '*') {
96 233 : incomment1 = true;
97 233 : c = q;
98 233 : *q++ = *query;
99 43723763 : } else if (*query == '\n') {
100 : /* collapse newlines */
101 865107 : if (q > r && q[-1] != '\n') {
102 823130 : *q++ = '\n';
103 823130 : lines++;
104 : }
105 42858656 : } else if (*query == ' ' || *query == '\t') {
106 : /* collapse white space */
107 7059519 : if (q > r && q[-1] != ' ')
108 5574464 : *q++ = ' ';
109 : } else {
110 35799137 : *q++ = *query;
111 : }
112 : }
113 401906 : *q = 0;
114 401906 : return r;
115 : }
116 :
117 : int
118 331 : scanner_init_keywords(void)
119 : {
120 331 : int failed = 0;
121 :
122 331 : failed += keywords_insert("false", BOOL_FALSE);
123 331 : failed += keywords_insert("true", BOOL_TRUE);
124 331 : failed += keywords_insert("bool", sqlBOOL);
125 :
126 331 : failed += keywords_insert("ALTER", ALTER);
127 331 : failed += keywords_insert("ADD", ADD);
128 331 : failed += keywords_insert("AND", AND);
129 :
130 331 : failed += keywords_insert("RANK", RANK);
131 331 : failed += keywords_insert("DENSE_RANK", RANK);
132 331 : failed += keywords_insert("PERCENT_RANK", RANK);
133 331 : failed += keywords_insert("CUME_DIST", RANK);
134 331 : failed += keywords_insert("ROW_NUMBER", RANK);
135 331 : failed += keywords_insert("NTILE", RANK);
136 331 : failed += keywords_insert("LAG", RANK);
137 331 : failed += keywords_insert("LEAD", RANK);
138 331 : failed += keywords_insert("FETCH", FETCH);
139 331 : failed += keywords_insert("FIRST_VALUE", RANK);
140 331 : failed += keywords_insert("LAST_VALUE", RANK);
141 331 : failed += keywords_insert("NTH_VALUE", RANK);
142 :
143 331 : failed += keywords_insert("BEST", BEST);
144 331 : failed += keywords_insert("EFFORT", EFFORT);
145 :
146 331 : failed += keywords_insert("AS", AS);
147 331 : failed += keywords_insert("ASC", ASC);
148 331 : failed += keywords_insert("AUTHORIZATION", AUTHORIZATION);
149 331 : failed += keywords_insert("BETWEEN", BETWEEN);
150 331 : failed += keywords_insert("SYMMETRIC", SYMMETRIC);
151 331 : failed += keywords_insert("ASYMMETRIC", ASYMMETRIC);
152 331 : failed += keywords_insert("BY", BY);
153 331 : failed += keywords_insert("CAST", CAST);
154 331 : failed += keywords_insert("CONVERT", CONVERT);
155 331 : failed += keywords_insert("CHARACTER", CHARACTER);
156 331 : failed += keywords_insert("CHAR", CHARACTER);
157 331 : failed += keywords_insert("VARYING", VARYING);
158 331 : failed += keywords_insert("VARCHAR", VARCHAR);
159 331 : failed += keywords_insert("BINARY", BINARY);
160 331 : failed += keywords_insert("LARGE", LARGE);
161 331 : failed += keywords_insert("OBJECT", OBJECT);
162 331 : failed += keywords_insert("CLOB", CLOB);
163 331 : failed += keywords_insert("BLOB", sqlBLOB);
164 331 : failed += keywords_insert("TEXT", sqlTEXT);
165 331 : failed += keywords_insert("TINYTEXT", sqlTEXT);
166 331 : failed += keywords_insert("STRING", CLOB); /* ? */
167 331 : failed += keywords_insert("CHECK", CHECK);
168 331 : failed += keywords_insert("CLIENT", CLIENT);
169 331 : failed += keywords_insert("SERVER", SERVER);
170 331 : failed += keywords_insert("COMMENT", COMMENT);
171 331 : failed += keywords_insert("CONSTRAINT", CONSTRAINT);
172 331 : failed += keywords_insert("CREATE", CREATE);
173 331 : failed += keywords_insert("CROSS", CROSS);
174 331 : failed += keywords_insert("COPY", COPY);
175 331 : failed += keywords_insert("RECORDS", RECORDS);
176 331 : failed += keywords_insert("DELIMITERS", DELIMITERS);
177 331 : failed += keywords_insert("STDIN", STDIN);
178 331 : failed += keywords_insert("STDOUT", STDOUT);
179 :
180 331 : failed += keywords_insert("TINYINT", TINYINT);
181 331 : failed += keywords_insert("SMALLINT", SMALLINT);
182 331 : failed += keywords_insert("INTEGER", sqlINTEGER);
183 331 : failed += keywords_insert("INT", sqlINTEGER);
184 331 : failed += keywords_insert("MEDIUMINT", sqlINTEGER);
185 331 : failed += keywords_insert("BIGINT", BIGINT);
186 : #ifdef HAVE_HGE
187 331 : failed += keywords_insert("HUGEINT", HUGEINT);
188 : #endif
189 331 : failed += keywords_insert("DEC", sqlDECIMAL);
190 331 : failed += keywords_insert("DECIMAL", sqlDECIMAL);
191 331 : failed += keywords_insert("NUMERIC", sqlDECIMAL);
192 331 : failed += keywords_insert("DECLARE", DECLARE);
193 331 : failed += keywords_insert("DEFAULT", DEFAULT);
194 331 : failed += keywords_insert("DESC", DESC);
195 331 : failed += keywords_insert("DISTINCT", DISTINCT);
196 331 : failed += keywords_insert("DOUBLE", sqlDOUBLE);
197 331 : failed += keywords_insert("REAL", sqlREAL);
198 331 : failed += keywords_insert("DROP", DROP);
199 331 : failed += keywords_insert("ESCAPE", ESCAPE);
200 331 : failed += keywords_insert("EXISTS", EXISTS);
201 331 : failed += keywords_insert("UESCAPE", UESCAPE);
202 331 : failed += keywords_insert("EXTRACT", EXTRACT);
203 331 : failed += keywords_insert("FLOAT", sqlFLOAT);
204 331 : failed += keywords_insert("FOR", FOR);
205 331 : failed += keywords_insert("FOREIGN", FOREIGN);
206 331 : failed += keywords_insert("FROM", FROM);
207 331 : failed += keywords_insert("FWF", FWF);
208 :
209 331 : failed += keywords_insert("BIG", BIG);
210 331 : failed += keywords_insert("LITTLE", LITTLE);
211 331 : failed += keywords_insert("NATIVE", NATIVE);
212 331 : failed += keywords_insert("ENDIAN", ENDIAN);
213 :
214 331 : failed += keywords_insert("REFERENCES", REFERENCES);
215 :
216 331 : failed += keywords_insert("MATCH", MATCH);
217 331 : failed += keywords_insert("FULL", FULL);
218 331 : failed += keywords_insert("PARTIAL", PARTIAL);
219 331 : failed += keywords_insert("SIMPLE", SIMPLE);
220 :
221 331 : failed += keywords_insert("INSERT", INSERT);
222 331 : failed += keywords_insert("UPDATE", UPDATE);
223 331 : failed += keywords_insert("DELETE", sqlDELETE);
224 331 : failed += keywords_insert("TRUNCATE", TRUNCATE);
225 331 : failed += keywords_insert("MATCHED", MATCHED);
226 :
227 331 : failed += keywords_insert("ACTION", ACTION);
228 331 : failed += keywords_insert("CASCADE", CASCADE);
229 331 : failed += keywords_insert("RESTRICT", RESTRICT);
230 331 : failed += keywords_insert("FIRST", FIRST);
231 331 : failed += keywords_insert("GLOBAL", GLOBAL);
232 331 : failed += keywords_insert("GROUP", sqlGROUP);
233 331 : failed += keywords_insert("GROUPING", GROUPING);
234 331 : failed += keywords_insert("ROLLUP", ROLLUP);
235 331 : failed += keywords_insert("CUBE", CUBE);
236 331 : failed += keywords_insert("HAVING", HAVING);
237 331 : failed += keywords_insert("ILIKE", ILIKE);
238 331 : failed += keywords_insert("IMPRINTS", IMPRINTS);
239 331 : failed += keywords_insert("IN", sqlIN);
240 331 : failed += keywords_insert("INNER", INNER);
241 331 : failed += keywords_insert("INTO", INTO);
242 331 : failed += keywords_insert("IS", IS);
243 331 : failed += keywords_insert("JOIN", JOIN);
244 331 : failed += keywords_insert("KEY", KEY);
245 331 : failed += keywords_insert("LATERAL", LATERAL);
246 331 : failed += keywords_insert("LEFT", LEFT);
247 331 : failed += keywords_insert("LIKE", LIKE);
248 331 : failed += keywords_insert("LIMIT", LIMIT);
249 331 : failed += keywords_insert("SAMPLE", SAMPLE);
250 331 : failed += keywords_insert("SEED", SEED);
251 331 : failed += keywords_insert("LAST", LAST);
252 331 : failed += keywords_insert("LOCAL", LOCAL);
253 331 : failed += keywords_insert("NATURAL", NATURAL);
254 331 : failed += keywords_insert("NOT", NOT);
255 331 : failed += keywords_insert("NULL", sqlNULL);
256 331 : failed += keywords_insert("NULLS", NULLS);
257 331 : failed += keywords_insert("OFFSET", OFFSET);
258 331 : failed += keywords_insert("ON", ON);
259 331 : failed += keywords_insert("OPTIONS", OPTIONS);
260 331 : failed += keywords_insert("OPTION", OPTION);
261 331 : failed += keywords_insert("OR", OR);
262 331 : failed += keywords_insert("ORDER", ORDER);
263 331 : failed += keywords_insert("ORDERED", ORDERED);
264 331 : failed += keywords_insert("OUTER", OUTER);
265 331 : failed += keywords_insert("OVER", OVER);
266 331 : failed += keywords_insert("PARTITION", PARTITION);
267 331 : failed += keywords_insert("PATH", PATH);
268 331 : failed += keywords_insert("PRECISION", PRECISION);
269 331 : failed += keywords_insert("PRIMARY", PRIMARY);
270 :
271 331 : failed += keywords_insert("USER", USER);
272 331 : failed += keywords_insert("RENAME", RENAME);
273 331 : failed += keywords_insert("UNENCRYPTED", UNENCRYPTED);
274 331 : failed += keywords_insert("ENCRYPTED", ENCRYPTED);
275 331 : failed += keywords_insert("PASSWORD", PASSWORD);
276 331 : failed += keywords_insert("GRANT", GRANT);
277 331 : failed += keywords_insert("REVOKE", REVOKE);
278 331 : failed += keywords_insert("ROLE", ROLE);
279 331 : failed += keywords_insert("ADMIN", ADMIN);
280 331 : failed += keywords_insert("PRIVILEGES", PRIVILEGES);
281 331 : failed += keywords_insert("PUBLIC", PUBLIC);
282 331 : failed += keywords_insert("CURRENT_USER", CURRENT_USER);
283 331 : failed += keywords_insert("CURRENT_ROLE", CURRENT_ROLE);
284 331 : failed += keywords_insert("SESSION_USER", SESSION_USER);
285 331 : failed += keywords_insert("CURRENT_SCHEMA", CURRENT_SCHEMA);
286 331 : failed += keywords_insert("SESSION", sqlSESSION);
287 331 : failed += keywords_insert("MAX_MEMORY", MAX_MEMORY);
288 331 : failed += keywords_insert("MAX_WORKERS", MAX_WORKERS);
289 331 : failed += keywords_insert("OPTIMIZER", OPTIMIZER);
290 :
291 331 : failed += keywords_insert("RIGHT", RIGHT);
292 331 : failed += keywords_insert("SCHEMA", SCHEMA);
293 331 : failed += keywords_insert("SELECT", SELECT);
294 331 : failed += keywords_insert("SET", SET);
295 331 : failed += keywords_insert("SETS", SETS);
296 331 : failed += keywords_insert("AUTO_COMMIT", AUTO_COMMIT);
297 :
298 331 : failed += keywords_insert("ALL", ALL);
299 331 : failed += keywords_insert("ANY", ANY);
300 331 : failed += keywords_insert("SOME", SOME);
301 331 : failed += keywords_insert("EVERY", ANY);
302 : /*
303 : failed += keywords_insert("SQLCODE", SQLCODE );
304 : */
305 331 : failed += keywords_insert("COLUMN", COLUMN);
306 331 : failed += keywords_insert("TABLE", TABLE);
307 331 : failed += keywords_insert("TEMPORARY", TEMPORARY);
308 331 : failed += keywords_insert("TEMP", TEMP);
309 331 : failed += keywords_insert("REMOTE", REMOTE);
310 331 : failed += keywords_insert("MERGE", MERGE);
311 331 : failed += keywords_insert("REPLICA", REPLICA);
312 331 : failed += keywords_insert("UNLOGGED", UNLOGGED);
313 331 : failed += keywords_insert("TO", TO);
314 331 : failed += keywords_insert("UNION", UNION);
315 331 : failed += keywords_insert("EXCEPT", EXCEPT);
316 331 : failed += keywords_insert("INTERSECT", INTERSECT);
317 331 : failed += keywords_insert("CORRESPONDING", CORRESPONDING);
318 331 : failed += keywords_insert("UNIQUE", UNIQUE);
319 331 : failed += keywords_insert("USING", USING);
320 331 : failed += keywords_insert("VALUES", VALUES);
321 331 : failed += keywords_insert("VIEW", VIEW);
322 331 : failed += keywords_insert("WHERE", WHERE);
323 331 : failed += keywords_insert("WITH", WITH);
324 331 : failed += keywords_insert("WITHOUT", WITHOUT);
325 331 : failed += keywords_insert("DATA", DATA);
326 :
327 331 : failed += keywords_insert("DATE", sqlDATE);
328 331 : failed += keywords_insert("TIME", TIME);
329 331 : failed += keywords_insert("TIMESTAMP", TIMESTAMP);
330 331 : failed += keywords_insert("INTERVAL", INTERVAL);
331 331 : failed += keywords_insert("CURRENT_DATE", CURRENT_DATE);
332 331 : failed += keywords_insert("CURRENT_TIME", CURRENT_TIME);
333 331 : failed += keywords_insert("CURRENT_TIMESTAMP", CURRENT_TIMESTAMP);
334 331 : failed += keywords_insert("CURRENT_TIMEZONE", CURRENT_TIMEZONE);
335 331 : failed += keywords_insert("NOW", CURRENT_TIMESTAMP);
336 331 : failed += keywords_insert("LOCALTIME", LOCALTIME);
337 331 : failed += keywords_insert("LOCALTIMESTAMP", LOCALTIMESTAMP);
338 331 : failed += keywords_insert("ZONE", ZONE);
339 :
340 331 : failed += keywords_insert("CENTURY", CENTURY);
341 331 : failed += keywords_insert("DECADE", DECADE);
342 331 : failed += keywords_insert("YEAR", YEAR);
343 331 : failed += keywords_insert("QUARTER", QUARTER);
344 331 : failed += keywords_insert("MONTH", MONTH);
345 331 : failed += keywords_insert("WEEK", WEEK);
346 331 : failed += keywords_insert("DOW", DOW);
347 331 : failed += keywords_insert("DOY", DOY);
348 331 : failed += keywords_insert("DAY", DAY);
349 331 : failed += keywords_insert("HOUR", HOUR);
350 331 : failed += keywords_insert("MINUTE", MINUTE);
351 331 : failed += keywords_insert("SECOND", SECOND);
352 331 : failed += keywords_insert("EPOCH", EPOCH);
353 :
354 331 : failed += keywords_insert("POSITION", POSITION);
355 331 : failed += keywords_insert("SUBSTRING", SUBSTRING);
356 331 : failed += keywords_insert("SPLIT_PART", SPLIT_PART);
357 331 : failed += keywords_insert("TRIM", TRIM);
358 331 : failed += keywords_insert("LEADING", LEADING);
359 331 : failed += keywords_insert("TRAILING", TRAILING);
360 331 : failed += keywords_insert("BOTH", BOTH);
361 :
362 331 : failed += keywords_insert("CASE", CASE);
363 331 : failed += keywords_insert("WHEN", WHEN);
364 331 : failed += keywords_insert("THEN", THEN);
365 331 : failed += keywords_insert("ELSE", ELSE);
366 331 : failed += keywords_insert("END", END);
367 331 : failed += keywords_insert("NULLIF", NULLIF);
368 331 : failed += keywords_insert("COALESCE", COALESCE);
369 331 : failed += keywords_insert("ELSEIF", ELSEIF);
370 331 : failed += keywords_insert("IF", IF);
371 331 : failed += keywords_insert("WHILE", WHILE);
372 331 : failed += keywords_insert("DO", DO);
373 :
374 331 : failed += keywords_insert("COMMIT", COMMIT);
375 331 : failed += keywords_insert("ROLLBACK", ROLLBACK);
376 331 : failed += keywords_insert("SAVEPOINT", SAVEPOINT);
377 331 : failed += keywords_insert("RELEASE", RELEASE);
378 331 : failed += keywords_insert("WORK", WORK);
379 331 : failed += keywords_insert("CHAIN", CHAIN);
380 331 : failed += keywords_insert("PRESERVE", PRESERVE);
381 331 : failed += keywords_insert("ROWS", ROWS);
382 331 : failed += keywords_insert("NO", NO);
383 331 : failed += keywords_insert("START", START);
384 331 : failed += keywords_insert("TRANSACTION", TRANSACTION);
385 331 : failed += keywords_insert("READ", READ);
386 331 : failed += keywords_insert("WRITE", WRITE);
387 331 : failed += keywords_insert("ONLY", ONLY);
388 331 : failed += keywords_insert("ISOLATION", ISOLATION);
389 331 : failed += keywords_insert("LEVEL", LEVEL);
390 331 : failed += keywords_insert("UNCOMMITTED", UNCOMMITTED);
391 331 : failed += keywords_insert("COMMITTED", COMMITTED);
392 331 : failed += keywords_insert("REPEATABLE", sqlREPEATABLE);
393 331 : failed += keywords_insert("SNAPSHOT", SNAPSHOT);
394 331 : failed += keywords_insert("SERIALIZABLE", SERIALIZABLE);
395 331 : failed += keywords_insert("DIAGNOSTICS", DIAGNOSTICS);
396 331 : failed += keywords_insert("SIZE", sqlSIZE);
397 331 : failed += keywords_insert("STORAGE", STORAGE);
398 :
399 331 : failed += keywords_insert("TYPE", TYPE);
400 331 : failed += keywords_insert("PROCEDURE", PROCEDURE);
401 331 : failed += keywords_insert("FUNCTION", FUNCTION);
402 331 : failed += keywords_insert("LOADER", sqlLOADER);
403 331 : failed += keywords_insert("REPLACE", REPLACE);
404 :
405 331 : failed += keywords_insert("FIELD", FIELD);
406 331 : failed += keywords_insert("FILTER", FILTER);
407 331 : failed += keywords_insert("AGGREGATE", AGGREGATE);
408 331 : failed += keywords_insert("RETURNS", RETURNS);
409 331 : failed += keywords_insert("EXTERNAL", EXTERNAL);
410 331 : failed += keywords_insert("NAME", sqlNAME);
411 331 : failed += keywords_insert("RETURN", RETURN);
412 331 : failed += keywords_insert("CALL", CALL);
413 331 : failed += keywords_insert("LANGUAGE", LANGUAGE);
414 :
415 331 : failed += keywords_insert("ANALYZE", ANALYZE);
416 331 : failed += keywords_insert("EXPLAIN", SQL_EXPLAIN);
417 331 : failed += keywords_insert("PLAN", SQL_PLAN);
418 331 : failed += keywords_insert("TRACE", SQL_TRACE);
419 331 : failed += keywords_insert("PREPARE", PREPARE);
420 331 : failed += keywords_insert("PREP", PREP);
421 331 : failed += keywords_insert("EXECUTE", EXECUTE);
422 331 : failed += keywords_insert("EXEC", EXEC);
423 331 : failed += keywords_insert("DEALLOCATE", DEALLOCATE);
424 :
425 331 : failed += keywords_insert("INDEX", INDEX);
426 :
427 331 : failed += keywords_insert("SEQUENCE", SEQUENCE);
428 331 : failed += keywords_insert("RESTART", RESTART);
429 331 : failed += keywords_insert("INCREMENT", INCREMENT);
430 331 : failed += keywords_insert("MAXVALUE", MAXVALUE);
431 331 : failed += keywords_insert("MINVALUE", MINVALUE);
432 331 : failed += keywords_insert("CYCLE", CYCLE);
433 331 : failed += keywords_insert("CACHE", CACHE);
434 331 : failed += keywords_insert("NEXT", NEXT);
435 331 : failed += keywords_insert("VALUE", VALUE);
436 331 : failed += keywords_insert("GENERATED", GENERATED);
437 331 : failed += keywords_insert("ALWAYS", ALWAYS);
438 331 : failed += keywords_insert("IDENTITY", IDENTITY);
439 331 : failed += keywords_insert("SERIAL", SERIAL);
440 331 : failed += keywords_insert("BIGSERIAL", BIGSERIAL);
441 331 : failed += keywords_insert("AUTO_INCREMENT", AUTO_INCREMENT);
442 331 : failed += keywords_insert("CONTINUE", CONTINUE);
443 :
444 331 : failed += keywords_insert("TRIGGER", TRIGGER);
445 331 : failed += keywords_insert("ATOMIC", ATOMIC);
446 331 : failed += keywords_insert("BEGIN", BEGIN);
447 331 : failed += keywords_insert("OF", OF);
448 331 : failed += keywords_insert("BEFORE", BEFORE);
449 331 : failed += keywords_insert("AFTER", AFTER);
450 331 : failed += keywords_insert("ROW", ROW);
451 331 : failed += keywords_insert("STATEMENT", STATEMENT);
452 331 : failed += keywords_insert("NEW", sqlNEW);
453 331 : failed += keywords_insert("OLD", OLD);
454 331 : failed += keywords_insert("EACH", EACH);
455 331 : failed += keywords_insert("REFERENCING", REFERENCING);
456 :
457 331 : failed += keywords_insert("RANGE", RANGE);
458 331 : failed += keywords_insert("UNBOUNDED", UNBOUNDED);
459 331 : failed += keywords_insert("PRECEDING", PRECEDING);
460 331 : failed += keywords_insert("FOLLOWING", FOLLOWING);
461 331 : failed += keywords_insert("CURRENT", CURRENT);
462 331 : failed += keywords_insert("EXCLUDE", EXCLUDE);
463 331 : failed += keywords_insert("OTHERS", OTHERS);
464 331 : failed += keywords_insert("TIES", TIES);
465 331 : failed += keywords_insert("GROUPS", GROUPS);
466 331 : failed += keywords_insert("WINDOW", WINDOW);
467 :
468 : /* special SQL/XML keywords */
469 331 : failed += keywords_insert("XMLCOMMENT", XMLCOMMENT);
470 331 : failed += keywords_insert("XMLCONCAT", XMLCONCAT);
471 331 : failed += keywords_insert("XMLDOCUMENT", XMLDOCUMENT);
472 331 : failed += keywords_insert("XMLELEMENT", XMLELEMENT);
473 331 : failed += keywords_insert("XMLATTRIBUTES", XMLATTRIBUTES);
474 331 : failed += keywords_insert("XMLFOREST", XMLFOREST);
475 331 : failed += keywords_insert("XMLPARSE", XMLPARSE);
476 331 : failed += keywords_insert("STRIP", STRIP);
477 331 : failed += keywords_insert("WHITESPACE", WHITESPACE);
478 331 : failed += keywords_insert("XMLPI", XMLPI);
479 331 : failed += keywords_insert("XMLQUERY", XMLQUERY);
480 331 : failed += keywords_insert("PASSING", PASSING);
481 331 : failed += keywords_insert("XMLTEXT", XMLTEXT);
482 331 : failed += keywords_insert("NIL", NIL);
483 331 : failed += keywords_insert("REF", REF);
484 331 : failed += keywords_insert("ABSENT", ABSENT);
485 331 : failed += keywords_insert("DOCUMENT", DOCUMENT);
486 331 : failed += keywords_insert("ELEMENT", ELEMENT);
487 331 : failed += keywords_insert("CONTENT", CONTENT);
488 331 : failed += keywords_insert("XMLNAMESPACES", XMLNAMESPACES);
489 331 : failed += keywords_insert("NAMESPACE", NAMESPACE);
490 331 : failed += keywords_insert("XMLVALIDATE", XMLVALIDATE);
491 331 : failed += keywords_insert("RETURNING", RETURNING);
492 331 : failed += keywords_insert("LOCATION", LOCATION);
493 331 : failed += keywords_insert("ID", ID);
494 331 : failed += keywords_insert("ACCORDING", ACCORDING);
495 331 : failed += keywords_insert("XMLSCHEMA", XMLSCHEMA);
496 331 : failed += keywords_insert("URI", URI);
497 331 : failed += keywords_insert("XMLAGG", XMLAGG);
498 :
499 : /* keywords for opengis */
500 331 : failed += keywords_insert("GEOMETRY", GEOMETRY);
501 :
502 331 : failed += keywords_insert("POINT", GEOMETRYSUBTYPE);
503 331 : failed += keywords_insert("LINESTRING", GEOMETRYSUBTYPE);
504 331 : failed += keywords_insert("POLYGON", GEOMETRYSUBTYPE);
505 331 : failed += keywords_insert("MULTIPOINT", GEOMETRYSUBTYPE);
506 331 : failed += keywords_insert("MULTILINESTRING", GEOMETRYSUBTYPE);
507 331 : failed += keywords_insert("MULTIPOLYGON", GEOMETRYSUBTYPE);
508 331 : failed += keywords_insert("GEOMETRYCOLLECTION", GEOMETRYSUBTYPE);
509 :
510 331 : failed += keywords_insert("POINTZ", GEOMETRYSUBTYPE);
511 331 : failed += keywords_insert("LINESTRINGZ", GEOMETRYSUBTYPE);
512 331 : failed += keywords_insert("POLYGONZ", GEOMETRYSUBTYPE);
513 331 : failed += keywords_insert("MULTIPOINTZ", GEOMETRYSUBTYPE);
514 331 : failed += keywords_insert("MULTILINESTRINGZ", GEOMETRYSUBTYPE);
515 331 : failed += keywords_insert("MULTIPOLYGONZ", GEOMETRYSUBTYPE);
516 331 : failed += keywords_insert("GEOMETRYCOLLECTIONZ", GEOMETRYSUBTYPE);
517 :
518 331 : failed += keywords_insert("POINTM", GEOMETRYSUBTYPE);
519 331 : failed += keywords_insert("LINESTRINGM", GEOMETRYSUBTYPE);
520 331 : failed += keywords_insert("POLYGONM", GEOMETRYSUBTYPE);
521 331 : failed += keywords_insert("MULTIPOINTM", GEOMETRYSUBTYPE);
522 331 : failed += keywords_insert("MULTILINESTRINGM", GEOMETRYSUBTYPE);
523 331 : failed += keywords_insert("MULTIPOLYGONM", GEOMETRYSUBTYPE);
524 331 : failed += keywords_insert("GEOMETRYCOLLECTIONM", GEOMETRYSUBTYPE);
525 :
526 331 : failed += keywords_insert("POINTZM", GEOMETRYSUBTYPE);
527 331 : failed += keywords_insert("LINESTRINGZM", GEOMETRYSUBTYPE);
528 331 : failed += keywords_insert("POLYGONZM", GEOMETRYSUBTYPE);
529 331 : failed += keywords_insert("MULTIPOINTZM", GEOMETRYSUBTYPE);
530 331 : failed += keywords_insert("MULTILINESTRINGZM", GEOMETRYSUBTYPE);
531 331 : failed += keywords_insert("MULTIPOLYGONZM", GEOMETRYSUBTYPE);
532 331 : failed += keywords_insert("GEOMETRYCOLLECTIONZM", GEOMETRYSUBTYPE);
533 331 : failed += keywords_insert("LOGIN", LOGIN);
534 : // odbc keywords
535 331 : failed += keywords_insert("d", ODBC_DATE_ESCAPE_PREFIX);
536 331 : failed += keywords_insert("t", ODBC_TIME_ESCAPE_PREFIX);
537 331 : failed += keywords_insert("ts", ODBC_TIMESTAMP_ESCAPE_PREFIX);
538 331 : failed += keywords_insert("guid", ODBC_GUID_ESCAPE_PREFIX);
539 331 : failed += keywords_insert("fn", ODBC_FUNC_ESCAPE_PREFIX);
540 331 : failed += keywords_insert("oj", ODBC_OJ_ESCAPE_PREFIX);
541 331 : failed += keywords_insert("DAYNAME", DAYNAME);
542 331 : failed += keywords_insert("IFNULL", IFNULL);
543 331 : failed += keywords_insert("MONTHNAME", MONTHNAME);
544 331 : failed += keywords_insert("TIMESTAMPADD", TIMESTAMPADD);
545 331 : failed += keywords_insert("TIMESTAMPDIFF", TIMESTAMPDIFF);
546 331 : failed += keywords_insert("SQL_BIGINT", SQL_BIGINT);
547 331 : failed += keywords_insert("SQL_BINARY", SQL_BINARY);
548 331 : failed += keywords_insert("SQL_BIT", SQL_BIT);
549 331 : failed += keywords_insert("SQL_CHAR", SQL_CHAR);
550 331 : failed += keywords_insert("SQL_DATE", SQL_DATE);
551 331 : failed += keywords_insert("SQL_DECIMAL", SQL_DECIMAL);
552 331 : failed += keywords_insert("SQL_DOUBLE", SQL_DOUBLE);
553 331 : failed += keywords_insert("SQL_FLOAT", SQL_FLOAT);
554 331 : failed += keywords_insert("SQL_GUID", SQL_GUID);
555 331 : failed += keywords_insert("SQL_HUGEINT", SQL_HUGEINT);
556 331 : failed += keywords_insert("SQL_INTEGER", SQL_INTEGER);
557 331 : failed += keywords_insert("SQL_INTERVAL_DAY", SQL_INTERVAL_DAY);
558 331 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_HOUR", SQL_INTERVAL_DAY_TO_HOUR);
559 331 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_MINUTE", SQL_INTERVAL_DAY_TO_MINUTE);
560 331 : failed += keywords_insert("SQL_INTERVAL_DAY_TO_SECOND", SQL_INTERVAL_DAY_TO_SECOND);
561 331 : failed += keywords_insert("SQL_INTERVAL_HOUR", SQL_INTERVAL_HOUR);
562 331 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_MINUTE", SQL_INTERVAL_HOUR_TO_MINUTE);
563 331 : failed += keywords_insert("SQL_INTERVAL_HOUR_TO_SECOND", SQL_INTERVAL_HOUR_TO_SECOND);
564 331 : failed += keywords_insert("SQL_INTERVAL_MINUTE", SQL_INTERVAL_MINUTE);
565 331 : failed += keywords_insert("SQL_INTERVAL_MINUTE_TO_SECOND", SQL_INTERVAL_MINUTE_TO_SECOND);
566 331 : failed += keywords_insert("SQL_INTERVAL_MONTH", SQL_INTERVAL_MONTH);
567 331 : failed += keywords_insert("SQL_INTERVAL_SECOND", SQL_INTERVAL_SECOND);
568 331 : failed += keywords_insert("SQL_INTERVAL_YEAR", SQL_INTERVAL_YEAR);
569 331 : failed += keywords_insert("SQL_INTERVAL_YEAR_TO_MONTH", SQL_INTERVAL_YEAR_TO_MONTH);
570 331 : failed += keywords_insert("SQL_LONGVARBINARY", SQL_LONGVARBINARY);
571 331 : failed += keywords_insert("SQL_LONGVARCHAR", SQL_LONGVARCHAR);
572 331 : failed += keywords_insert("SQL_NUMERIC", SQL_NUMERIC);
573 331 : failed += keywords_insert("SQL_REAL", SQL_REAL);
574 331 : failed += keywords_insert("SQL_SMALLINT", SQL_SMALLINT);
575 331 : failed += keywords_insert("SQL_TIME", SQL_TIME);
576 331 : failed += keywords_insert("SQL_TIMESTAMP", SQL_TIMESTAMP);
577 331 : failed += keywords_insert("SQL_TINYINT", SQL_TINYINT);
578 331 : failed += keywords_insert("SQL_VARBINARY", SQL_VARBINARY);
579 331 : failed += keywords_insert("SQL_VARCHAR", SQL_VARCHAR);
580 331 : failed += keywords_insert("SQL_WCHAR", SQL_WCHAR);
581 331 : failed += keywords_insert("SQL_WLONGVARCHAR", SQL_WLONGVARCHAR);
582 331 : failed += keywords_insert("SQL_WVARCHAR", SQL_WVARCHAR);
583 331 : failed += keywords_insert("SQL_TSI_FRAC_SECOND", SQL_TSI_FRAC_SECOND);
584 331 : failed += keywords_insert("SQL_TSI_SECOND", SQL_TSI_SECOND);
585 331 : failed += keywords_insert("SQL_TSI_MINUTE", SQL_TSI_MINUTE);
586 331 : failed += keywords_insert("SQL_TSI_HOUR", SQL_TSI_HOUR);
587 331 : failed += keywords_insert("SQL_TSI_DAY", SQL_TSI_DAY);
588 331 : failed += keywords_insert("SQL_TSI_WEEK", SQL_TSI_WEEK);
589 331 : failed += keywords_insert("SQL_TSI_MONTH", SQL_TSI_MONTH);
590 331 : failed += keywords_insert("SQL_TSI_QUARTER", SQL_TSI_QUARTER);
591 331 : failed += keywords_insert("SQL_TSI_YEAR", SQL_TSI_YEAR);
592 :
593 331 : failed += keywords_insert("LEAST", MARGFUNC);
594 331 : failed += keywords_insert("GREATEST", MARGFUNC);
595 331 : return failed;
596 : }
597 :
598 : #define find_keyword_bs(lc, s) find_keyword(lc->rs->buf+lc->rs->pos+s)
599 :
600 : void
601 246161 : scanner_init(struct scanner *s, bstream *rs, stream *ws)
602 : {
603 492322 : *s = (struct scanner) {
604 : .rs = rs,
605 : .ws = ws,
606 : .mode = LINE_N,
607 246161 : .raw_string_mode = GDKgetenv_istrue("raw_strings"),
608 : .aborted = false,
609 : };
610 246161 : }
611 :
612 : void
613 1316678 : scanner_query_processed(struct scanner *s)
614 : {
615 1316678 : int cur;
616 :
617 1316678 : if (s->yybak) {
618 513977 : s->rs->buf[s->rs->pos + s->yycur] = s->yybak;
619 513977 : s->yybak = 0;
620 : }
621 1316678 : if (s->rs) {
622 1316678 : s->rs->pos += s->yycur;
623 : /* completely eat the query including white space after the ; */
624 2475773 : while (s->rs->pos < s->rs->len &&
625 2141410 : (cur = s->rs->buf[s->rs->pos], iswspace(cur))) {
626 1159095 : s->rs->pos++;
627 : }
628 : }
629 : /*assert(s->rs->pos <= s->rs->len);*/
630 1316678 : s->yycur = 0;
631 1316678 : s->started = 0;
632 1316678 : s->as = 0;
633 1316678 : s->schema = NULL;
634 1316678 : }
635 :
636 : static int
637 33 : scanner_error(mvc *lc, int cur)
638 : {
639 33 : switch (cur) {
640 0 : case EOF:
641 0 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected end of input");
642 0 : return EOF;
643 33 : default:
644 : /* on Windows at least, iswcntrl returns TRUE for
645 : * U+FEFF, but we just want consistent error
646 : * messages */
647 33 : (void) sql_error(lc, 1, SQLSTATE(42000) "Unexpected%s character (U+%04X)", iswcntrl(cur) && cur != 0xFEFF ? " control" : "", (unsigned) cur);
648 : }
649 33 : return LEX_ERROR;
650 : }
651 :
652 :
653 : /*
654 : UTF-8 encoding is as follows:
655 : U-00000000 - U-0000007F: 0xxxxxxx
656 : U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
657 : U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
658 : U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
659 : U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
660 : U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
661 : */
662 : /* To be correctly coded UTF-8, the sequence should be the shortest
663 : possible encoding of the value being encoded. This means that for
664 : an encoding of length n+1 (1 <= n <= 5), at least one of the bits in
665 : utf8chkmsk[n] should be non-zero (else the encoding could be
666 : shorter).
667 : */
668 : static const int utf8chkmsk[] = {
669 : 0x0000007f,
670 : 0x00000780,
671 : 0x0000f800,
672 : 0x001f0000,
673 : 0x03e00000,
674 : 0x7c000000
675 : };
676 :
677 : static void
678 31660571 : utf8_putchar(struct scanner *lc, int ch)
679 : {
680 31660571 : if ((ch) < 0x80) {
681 31660566 : lc->yycur--;
682 5 : } else if ((ch) < 0x800) {
683 0 : lc->yycur -= 2;
684 5 : } else if ((ch) < 0x10000) {
685 5 : lc->yycur -= 3;
686 : } else {
687 0 : lc->yycur -= 4;
688 : }
689 31660571 : }
690 :
691 : static inline int
692 137773831 : scanner_read_more(struct scanner *lc, size_t n)
693 : {
694 137773831 : bstream *b = lc->rs;
695 137773831 : bool more = false;
696 :
697 :
698 137773831 : if (lc->aborted)
699 : return EOF;
700 137778138 : while (b->len < b->pos + lc->yycur + n) {
701 :
702 136667 : if (lc->mode == LINE_1 || !lc->started)
703 : return EOF;
704 :
705 : /* query is not finished ask for more */
706 3943 : if (b->eof || !isa_block_stream(b->s)) {
707 1786 : if (bstream_getoob(b)) {
708 0 : lc->aborted = true;
709 0 : return EOF;
710 : }
711 2157 : if (mnstr_write(lc->ws, PROMPT2, sizeof(PROMPT2) - 1, 1) == 1)
712 2157 : mnstr_flush(lc->ws, MNSTR_FLUSH_DATA);
713 2157 : b->eof = false;
714 2157 : more = true;
715 : }
716 : /* we need more query text */
717 4314 : if (bstream_next(b) < 0) {
718 0 : if (mnstr_errnr(b->s) == MNSTR_INTERRUPT) {
719 : // now what?
720 0 : lc->errstr = "Query aborted";
721 0 : lc->aborted = true;
722 0 : mnstr_clearerr(b->s);
723 : }
724 0 : return EOF;
725 4314 : } else if (/* we asked for more data but didn't get any */
726 2157 : (more && b->eof && b->len < b->pos + lc->yycur + n))
727 : return EOF;
728 4307 : if (more && b->pos + lc->yycur + 2 == b->len && b->buf[b->pos + lc->yycur] == '\200' && b->buf[b->pos + lc->yycur + 1] == '\n') {
729 0 : lc->errstr = "Query aborted";
730 0 : b->len -= 2;
731 0 : b->buf[b->len] = 0;
732 0 : return EOF;
733 : }
734 : }
735 : return 1;
736 : }
737 :
738 : static inline int
739 136516508 : scanner_getc(struct scanner *lc)
740 : {
741 136516508 : bstream *b = lc->rs;
742 136516508 : unsigned char *s = NULL;
743 136516508 : int c, m, n, mask;
744 :
745 136516508 : if (scanner_read_more(lc, 1) == EOF) {
746 : //lc->errstr = SQLSTATE(42000) "end of input stream";
747 : return EOF;
748 : }
749 136379520 : lc->errstr = NULL;
750 :
751 136379520 : s = (unsigned char *) b->buf + b->pos + lc->yycur++;
752 136379520 : if (((c = *s) & 0x80) == 0) {
753 : /* 7-bit char */
754 : return c;
755 : }
756 88250 : for (n = 0, m = 0x40; c & m; n++, m >>= 1)
757 : ;
758 : /* n now is number of 10xxxxxx bytes that should follow */
759 29443 : if (n == 0 || n >= 6 || (b->pos + n) > b->len) {
760 : /* incorrect UTF-8 sequence */
761 : /* n==0: c == 10xxxxxx */
762 : /* n>=6: c == 1111111x */
763 0 : lc->errstr = SQLSTATE(42000) "invalid start of UTF-8 sequence";
764 0 : goto error;
765 : }
766 :
767 29443 : if (scanner_read_more(lc, (size_t) n) == EOF)
768 : return EOF;
769 29443 : s = (unsigned char *) b->buf + b->pos + lc->yycur;
770 :
771 29443 : mask = utf8chkmsk[n];
772 29443 : c &= ~(0xFFC0 >> n); /* remove non-x bits */
773 88249 : while (--n >= 0) {
774 58807 : c <<= 6;
775 58807 : lc->yycur++;
776 58807 : if (((m = *s++) & 0xC0) != 0x80) {
777 : /* incorrect UTF-8 sequence: byte is not 10xxxxxx */
778 : /* this includes end-of-string (m == 0) */
779 1 : lc->errstr = SQLSTATE(42000) "invalid continuation in UTF-8 sequence";
780 1 : goto error;
781 : }
782 58806 : c |= m & 0x3F;
783 : }
784 29442 : if ((c & mask) == 0) {
785 : /* incorrect UTF-8 sequence: not shortest possible */
786 0 : lc->errstr = SQLSTATE(42000) "not shortest possible UTF-8 sequence";
787 0 : goto error;
788 : }
789 :
790 : return c;
791 :
792 1 : error:
793 1 : if (b->pos + lc->yycur < b->len) /* skip bogus char */
794 0 : lc->yycur++;
795 : return EOF;
796 : }
797 :
798 : static int
799 28322250 : scanner_token(struct scanner *lc, int token)
800 : {
801 28322250 : lc->yybak = lc->rs->buf[lc->rs->pos + lc->yycur];
802 28322250 : lc->rs->buf[lc->rs->pos + lc->yycur] = 0;
803 28322250 : lc->yyval = token;
804 28322250 : return lc->yyval;
805 : }
806 :
807 : static int
808 2107862 : scanner_string(mvc *c, int quote, bool escapes)
809 : {
810 2107862 : struct scanner *lc = &c->scanner;
811 2107862 : bstream *rs = lc->rs;
812 2107862 : int cur = quote;
813 2107862 : bool escape = false;
814 2107862 : const size_t limit = quote == '"' ? 1 << 11 : 1 << 30;
815 :
816 2107862 : lc->started = 1;
817 2145506 : while (cur != EOF) {
818 2145491 : size_t pos = 0;
819 2145491 : const size_t yycur = rs->pos + lc->yycur;
820 :
821 35291576 : while (cur != EOF && (quote != '"' || cur != 0xFEFF) && pos < limit &&
822 33146085 : (((cur = rs->buf[yycur + pos++]) & 0x80) == 0) &&
823 66262716 : cur && (cur != quote || escape)) {
824 31000595 : if (escapes && cur == '\\')
825 6612 : escape = !escape;
826 : else
827 : escape = false;
828 : }
829 2145491 : if (pos == limit) {
830 0 : (void) sql_error(c, 2, SQLSTATE(42000) "string too long");
831 0 : return LEX_ERROR;
832 : }
833 : /* BOM character not allowed as an identifier */
834 2145491 : if (cur == EOF || (quote == '"' && cur == 0xFEFF))
835 1 : return scanner_error(c, cur);
836 2145490 : lc->yycur += pos;
837 : /* check for quote escaped quote: Obscure SQL Rule */
838 2145490 : if (cur == quote && rs->buf[yycur + pos] == quote) {
839 8203 : lc->yycur++;
840 8203 : continue;
841 : }
842 2137287 : assert(yycur + pos <= rs->len + 1);
843 2137287 : if (cur == quote && !escape) {
844 2107832 : return scanner_token(lc, STRING);
845 : }
846 29455 : lc->yycur--; /* go back to current (possibly invalid) char */
847 : /* long utf8, if correct isn't the quote */
848 29455 : if (!cur) {
849 30 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
850 14 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
851 14 : return LEX_ERROR;
852 : }
853 16 : cur = scanner_read_more(lc, 1);
854 : } else {
855 29425 : cur = scanner_getc(lc);
856 : }
857 : }
858 15 : (void) sql_error(c, 2, "%s", lc->errstr ? lc->errstr : SQLSTATE(42000) "Unexpected end of input");
859 15 : return EOF;
860 : }
861 :
862 : /* scan a structure {blah} into a string. We only count the matching {}
863 : * unless escaped. We do not consider embeddings in string literals yet
864 : */
865 :
866 : static int
867 234 : scanner_body(mvc *c)
868 : {
869 234 : struct scanner *lc = &c->scanner;
870 234 : bstream *rs = lc->rs;
871 234 : int cur = (int) 'x';
872 234 : int blk = 1;
873 234 : bool escape = false;
874 :
875 234 : lc->started = 1;
876 234 : assert(rs->buf[rs->pos + lc->yycur-1] == '{');
877 290 : while (cur != EOF) {
878 290 : size_t pos = rs->pos + lc->yycur;
879 :
880 32350 : while ((((cur = rs->buf[pos++]) & 0x80) == 0) && cur && (blk || escape)) {
881 32060 : if (cur != '\\')
882 : escape = false;
883 : else
884 12 : escape = !escape;
885 32060 : blk += cur =='{';
886 32060 : blk -= cur =='}';
887 : }
888 290 : lc->yycur = pos - rs->pos;
889 290 : assert(pos <= rs->len + 1);
890 290 : if (blk == 0 && !escape){
891 234 : lc->yycur--; /* go back to current (possibly invalid) char */
892 234 : return scanner_token(lc, X_BODY);
893 : }
894 56 : lc->yycur--; /* go back to current (possibly invalid) char */
895 56 : if (!cur) {
896 56 : if (lc->rs->len >= lc->rs->pos + lc->yycur + 1) {
897 0 : (void) sql_error(c, 2, SQLSTATE(42000) "NULL byte in string");
898 0 : return LEX_ERROR;
899 : }
900 56 : cur = scanner_read_more(lc, 1);
901 : } else {
902 0 : cur = scanner_getc(lc);
903 : }
904 : }
905 0 : (void) sql_error(c, 2, SQLSTATE(42000) "Unexpected end of input");
906 0 : return EOF;
907 : }
908 :
909 : static int
910 13602378 : keyword_or_ident(mvc * c, int cur)
911 : {
912 13602378 : struct scanner *lc = &c->scanner;
913 13602378 : keyword *k = NULL;
914 13602378 : size_t s;
915 :
916 13602378 : lc->started = 1;
917 13602378 : utf8_putchar(lc, cur);
918 13602364 : s = lc->yycur;
919 13602364 : lc->yyval = IDENT;
920 81345547 : while ((cur = scanner_getc(lc)) != EOF) {
921 81345410 : if (!iswalnum(cur) && cur != '_') {
922 13602227 : utf8_putchar(lc, cur);
923 13602249 : (void)scanner_token(lc, IDENT);
924 13602249 : if ((k = find_keyword_bs(lc,s)))
925 8347425 : lc->yyval = k->token;
926 13602414 : return lc->yyval;
927 : }
928 : }
929 : if (cur < 0)
930 : return cur;
931 : (void)scanner_token(lc, IDENT);
932 : if ((k = find_keyword_bs(lc,s)))
933 : lc->yyval = k->token;
934 : return lc->yyval;
935 : }
936 :
937 : static int
938 14210382 : skip_white_space(struct scanner * lc)
939 : {
940 17836476 : int cur;
941 :
942 17836476 : do {
943 17836476 : lc->yysval = lc->yycur;
944 17836476 : } while ((cur = scanner_getc(lc)) != EOF && iswspace(cur));
945 14210345 : return cur;
946 : }
947 :
948 : static int
949 69402 : skip_c_comment(struct scanner * lc)
950 : {
951 69402 : int cur;
952 69402 : int prev = 0;
953 69402 : int started = lc->started;
954 69402 : int depth = 1;
955 :
956 69402 : lc->started = 1;
957 1399628 : while (depth > 0 && (cur = scanner_getc(lc)) != EOF) {
958 1330226 : if (prev == '*' && cur == '/')
959 69402 : depth--;
960 1260824 : else if (prev == '/' && cur == '*') {
961 : /* block comments can nest */
962 0 : cur = 0; /* prevent slash-star-slash from matching */
963 0 : depth++;
964 : }
965 : prev = cur;
966 : }
967 69402 : lc->yysval = lc->yycur;
968 69402 : lc->started = started;
969 : /* a comment is equivalent to a newline */
970 69402 : return cur == EOF ? cur : '\n';
971 : }
972 :
973 : static int
974 3178 : skip_sql_comment(struct scanner * lc)
975 : {
976 3178 : int cur;
977 3178 : int started = lc->started;
978 :
979 3178 : lc->started = 1;
980 828372 : while ((cur = scanner_getc(lc)) != EOF && (cur != '\n'))
981 : ;
982 3178 : lc->yysval = lc->yycur;
983 3178 : lc->started = started;
984 : /* a comment is equivalent to a newline */
985 3178 : return cur;
986 : }
987 :
988 : static int tokenize(mvc * lc, int cur);
989 :
990 5747514 : static inline bool is_valid_decimal_digit(int cur) { return (iswdigit(cur)); }
991 13 : static inline bool is_valid_binary_digit(int cur) { return (iswdigit(cur) && cur < '2'); }
992 10 : static inline bool is_valid_octal_digit(int cur) { return (iswdigit(cur) && cur < '8'); }
993 3688 : static inline bool is_valid_hexadecimal_digit(int cur) { return iswxdigit(cur); }
994 :
995 1908599 : static inline int check_validity_number(mvc* c, int pcur, bool initial_underscore_allowed, int *token, int type) {
996 1908599 : struct scanner *lc = &c->scanner;
997 1908599 : bool (*is_valid_n_ary_digit)(int);
998 :
999 1908599 : if (pcur == '_' && !initial_underscore_allowed) /* ERROR: initial underscore not allowed */ {
1000 0 : *token = 0;
1001 0 : return '_';
1002 : }
1003 :
1004 1908599 : switch (type) {
1005 : case BINARYNUM:
1006 : is_valid_n_ary_digit = &is_valid_binary_digit;
1007 : break;
1008 3 : case OCTALNUM:
1009 3 : is_valid_n_ary_digit = &is_valid_octal_digit;
1010 3 : break;
1011 280 : case HEXADECIMALNUM:
1012 280 : is_valid_n_ary_digit = &is_valid_hexadecimal_digit;
1013 280 : break;
1014 1908314 : default:
1015 1908314 : is_valid_n_ary_digit = &is_valid_decimal_digit;
1016 1908314 : break;
1017 : }
1018 :
1019 1908599 : if ( !(pcur == '_' || is_valid_n_ary_digit(pcur)) ) /* ERROR: first digit is not valid */ {
1020 17 : *token = 0;
1021 17 : return pcur;
1022 : }
1023 :
1024 1908497 : int cur = scanner_getc(lc);
1025 1908449 : *token = type;
1026 3844648 : while (cur != EOF) {
1027 3844645 : if (cur == '_') {
1028 25 : if (pcur == '_') /* ERROR: multiple consecutive underscores */ {
1029 2 : *token = 0;
1030 2 : return '_';
1031 : }
1032 : }
1033 3844620 : else if (!is_valid_n_ary_digit(cur))
1034 : break;
1035 1936212 : pcur = cur;
1036 1936212 : cur = scanner_getc(lc);
1037 : }
1038 :
1039 1908281 : if (pcur == '_') {
1040 3 : *token = 0;
1041 3 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1042 : return cur;
1043 : else /* ERROR: number ends with underscore */
1044 : return '_';
1045 : }
1046 :
1047 : return cur;
1048 : }
1049 :
1050 : static int
1051 1895279 : number(mvc * c, int cur)
1052 : {
1053 1895279 : struct scanner *lc = &c->scanner;
1054 1895279 : int token = sqlINT;
1055 :
1056 : /* a number has one of these forms (expressed in regular expressions):
1057 : * 0x[0-9A-Fa-f]+ -- (hexadecimal) INTEGER
1058 : * \.[0-9]+ -- DECIMAL
1059 : * [0-9]+\.[0-9]* -- DECIMAL
1060 : * [0-9]+@0 -- OID
1061 : * [0-9]*\.[0-9]+[eE][-+]?[0-9]+ -- REAL
1062 : * [0-9]+(\.[0-9]*)?[eE][-+]?[0-9]+ -- REAL
1063 : * [0-9]+ -- (decimal) INTEGER
1064 : */
1065 1895279 : lc->started = 1;
1066 1895279 : if (cur == '0') {
1067 303500 : switch ((cur = scanner_getc(lc))) {
1068 2 : case 'b':
1069 2 : cur = scanner_getc(lc);
1070 2 : if ((cur = check_validity_number(c, cur, true, &token, BINARYNUM)) == EOF) return cur;
1071 : break;
1072 3 : case 'o':
1073 3 : cur = scanner_getc(lc);
1074 3 : if ((cur = check_validity_number(c, cur, true, &token, OCTALNUM)) == EOF) return cur;
1075 : break;
1076 280 : case 'x':
1077 280 : cur = scanner_getc(lc);
1078 280 : if ((cur = check_validity_number(c, cur, true, &token, HEXADECIMALNUM)) == EOF) return cur;
1079 : break;
1080 303217 : default:
1081 303217 : utf8_putchar(lc, cur);
1082 303217 : cur = '0';
1083 : }
1084 : }
1085 1895281 : if (token == sqlINT) {
1086 1894988 : if ((cur = check_validity_number(c, cur, false, &token, sqlINT)) == EOF) return cur;
1087 1894728 : if (cur == '@') {
1088 0 : if (token == sqlINT) {
1089 0 : cur = scanner_getc(lc);
1090 0 : if (cur == EOF)
1091 : return cur;
1092 0 : if (cur == '0') {
1093 0 : cur = scanner_getc(lc);
1094 0 : if (cur == EOF)
1095 : return cur;
1096 0 : token = OIDNUM;
1097 : } else {
1098 : /* number + '@' not followed by 0: show '@' as erroneous */
1099 0 : utf8_putchar(lc, cur);
1100 0 : cur = '@';
1101 0 : token = 0;
1102 : }
1103 : }
1104 : } else {
1105 1894728 : if (cur == '.') {
1106 11089 : cur = scanner_getc(lc);
1107 11089 : if (iswalnum(cur)) /* early exit for numerical forms with final . e.g. 10. */
1108 11083 : if ((cur = check_validity_number(c, cur, false, &token, INTNUM)) == EOF) return cur;
1109 : }
1110 1894728 : if (token != 0)
1111 1894729 : if (cur == 'e' || cur == 'E') {
1112 2225 : cur = scanner_getc(lc);
1113 2225 : if (cur == '+' || cur == '-')
1114 2111 : cur = scanner_getc(lc);
1115 2225 : if ((cur = check_validity_number(c, cur, false, &token, APPROXNUM)) == EOF) return cur;
1116 : }
1117 : }
1118 : }
1119 :
1120 1892796 : assert(cur != EOF);
1121 :
1122 1895021 : if (iswalnum(cur)) /* ERROR: not a valid digit */
1123 6 : token = 0;
1124 :
1125 1895021 : utf8_putchar(lc, cur);
1126 :
1127 1894975 : if (token) {
1128 1894965 : return scanner_token(lc, token);
1129 : } else {
1130 10 : (void)sql_error( c, 2, SQLSTATE(42000) "Unexpected symbol %lc", (wint_t) cur);
1131 10 : return LEX_ERROR;
1132 : }
1133 : }
1134 :
1135 : static
1136 12890631 : int scanner_symbol(mvc * c, int cur)
1137 : {
1138 12890631 : struct scanner *lc = &c->scanner;
1139 12890631 : int next = 0;
1140 12890631 : int started = lc->started;
1141 :
1142 12890631 : switch (cur) {
1143 72043 : case '/':
1144 72043 : lc->started = 1;
1145 72043 : next = scanner_getc(lc);
1146 72043 : if (next < 0)
1147 : return EOF;
1148 72043 : if (next == '*') {
1149 69402 : lc->started = started;
1150 69402 : cur = skip_c_comment(lc);
1151 69402 : if (cur < 0)
1152 : return EOF;
1153 69402 : return tokenize(c, cur);
1154 : } else {
1155 2641 : utf8_putchar(lc, next);
1156 2641 : return scanner_token(lc, cur);
1157 : }
1158 0 : case '0':
1159 : case '1':
1160 : case '2':
1161 : case '3':
1162 : case '4':
1163 : case '5':
1164 : case '6':
1165 : case '7':
1166 : case '8':
1167 : case '9':
1168 0 : return number(c, cur);
1169 5 : case '#':
1170 5 : if ((cur = skip_sql_comment(lc)) == EOF)
1171 : return cur;
1172 5 : return tokenize(c, cur);
1173 802709 : case '\'':
1174 802709 : if (lc->raw_string_mode || lc->next_string_is_raw)
1175 46 : return scanner_string(c, cur, false);
1176 802663 : return scanner_string(c, cur, true);
1177 1298113 : case '"':
1178 1298113 : return scanner_string(c, cur, false);
1179 500 : case '{':
1180 : // if previous tokens like LANGUAGE IDENT
1181 : // TODO checking on IDENT only may not be enough
1182 500 : if (lc->yylast == IDENT)
1183 234 : return scanner_body(c);
1184 266 : lc->started = 1;
1185 266 : return scanner_token(lc, cur);
1186 266 : case '}':
1187 266 : lc->started = 1;
1188 266 : return scanner_token(lc, cur);
1189 29905 : case '-':
1190 29905 : lc->started = 1;
1191 29905 : next = scanner_getc(lc);
1192 29905 : if (next < 0)
1193 : return EOF;
1194 29904 : if (next == '-') {
1195 3173 : lc->started = started;
1196 3173 : if ((cur = skip_sql_comment(lc)) == EOF)
1197 : return cur;
1198 3173 : return tokenize(c, cur);
1199 : }
1200 26731 : lc->started = 1;
1201 26731 : utf8_putchar(lc, next);
1202 26731 : return scanner_token(lc, cur);
1203 12 : case '~': /* binary not */
1204 12 : lc->started = 1;
1205 12 : next = scanner_getc(lc);
1206 12 : if (next < 0)
1207 : return EOF;
1208 12 : if (next == '=')
1209 5 : return scanner_token(lc, GEOM_MBR_EQUAL);
1210 7 : utf8_putchar(lc, next);
1211 7 : return scanner_token(lc, cur);
1212 7239627 : case '^': /* binary xor */
1213 : case '*':
1214 : case '?':
1215 : case ':':
1216 : case '%':
1217 : case '+':
1218 : case '(':
1219 : case ')':
1220 : case ',':
1221 : case '=':
1222 : case '[':
1223 : case ']':
1224 7239627 : lc->started = 1;
1225 7239627 : return scanner_token(lc, cur);
1226 6253 : case '&':
1227 6253 : lc->started = 1;
1228 6253 : cur = scanner_getc(lc);
1229 6253 : if (cur < 0)
1230 : return EOF;
1231 6253 : if (cur < 0)
1232 : return EOF;
1233 6253 : if(cur == '<') {
1234 3 : next = scanner_getc(lc);
1235 3 : if (next < 0)
1236 : return EOF;
1237 3 : if(next == '|') {
1238 0 : return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
1239 : } else {
1240 3 : utf8_putchar(lc, next); //put the char back
1241 3 : return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
1242 : }
1243 6250 : } else if(cur == '>')
1244 3 : return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
1245 6247 : else if(cur == '&')
1246 3 : return scanner_token(lc, GEOM_OVERLAP);
1247 : else {/* binary and */
1248 6244 : utf8_putchar(lc, cur); //put the char back
1249 6244 : return scanner_token(lc, '&');
1250 : }
1251 19 : case '@':
1252 19 : lc->started = 1;
1253 19 : return scanner_token(lc, AT);
1254 995931 : case ';':
1255 995931 : lc->started = 0;
1256 995931 : return scanner_token(lc, SCOLON);
1257 27 : case '!':
1258 27 : lc->started = 1;
1259 27 : cur = scanner_getc(lc);
1260 27 : if (cur < 0)
1261 : return EOF;
1262 27 : else if (cur == '=') {
1263 21 : lc->rs->buf[lc->rs->pos + lc->yycur - 2] = '<';
1264 21 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = '>';
1265 21 : return scanner_token( lc, COMPARISON);
1266 : } else {
1267 6 : utf8_putchar(lc, cur); //put the char back
1268 : }
1269 6 : return scanner_token(lc, '!');
1270 51381 : case '<':
1271 51381 : lc->started = 1;
1272 51381 : cur = scanner_getc(lc);
1273 51381 : if (cur < 0)
1274 : return EOF;
1275 51381 : if (cur == '=') {
1276 3123 : return scanner_token( lc, COMPARISON);
1277 48258 : } else if (cur == '>') {
1278 34804 : return scanner_token( lc, COMPARISON);
1279 13454 : } else if (cur == '<') {
1280 44 : next = scanner_getc(lc);
1281 44 : if (next < 0)
1282 : return EOF;
1283 44 : if (next == '=') {
1284 4 : return scanner_token( lc, LEFT_SHIFT_ASSIGN);
1285 40 : } else if (next == '|') {
1286 1 : return scanner_token(lc, GEOM_BELOW);
1287 : } else {
1288 39 : utf8_putchar(lc, next); //put the char back
1289 39 : return scanner_token( lc, LEFT_SHIFT);
1290 : }
1291 13410 : } else if(cur == '-') {
1292 19 : next = scanner_getc(lc);
1293 19 : if (next < 0)
1294 : return EOF;
1295 19 : if(next == '>') {
1296 7 : return scanner_token(lc, GEOM_DIST);
1297 : } else {
1298 : //put the characters back and fall in the next possible case
1299 12 : utf8_putchar(lc, next);
1300 12 : utf8_putchar(lc, cur);
1301 12 : return scanner_token( lc, COMPARISON);
1302 : }
1303 : } else {
1304 13391 : utf8_putchar(lc, cur);
1305 13391 : return scanner_token( lc, COMPARISON);
1306 : }
1307 46909 : case '>':
1308 46909 : lc->started = 1;
1309 46909 : cur = scanner_getc(lc);
1310 46909 : if (cur < 0)
1311 : return EOF;
1312 46909 : if (cur == '>') {
1313 2657 : cur = scanner_getc(lc);
1314 2657 : if (cur < 0)
1315 : return EOF;
1316 2657 : if (cur == '=')
1317 3 : return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
1318 2654 : utf8_putchar(lc, cur);
1319 2654 : return scanner_token( lc, RIGHT_SHIFT);
1320 44252 : } else if (cur != '=') {
1321 42000 : utf8_putchar(lc, cur);
1322 42000 : return scanner_token( lc, COMPARISON);
1323 : } else {
1324 2252 : return scanner_token( lc, COMPARISON);
1325 : }
1326 2165424 : case '.':
1327 2165424 : lc->started = 1;
1328 2165424 : cur = scanner_getc(lc);
1329 2165424 : if (cur < 0)
1330 : return EOF;
1331 2165423 : if (!iswdigit(cur)) {
1332 2165410 : utf8_putchar(lc, cur);
1333 2165410 : return scanner_token( lc, '.');
1334 : } else {
1335 13 : utf8_putchar(lc, cur);
1336 13 : cur = '.';
1337 13 : return number(c, cur);
1338 : }
1339 181497 : case '|': /* binary or or string concat */
1340 181497 : lc->started = 1;
1341 181497 : cur = scanner_getc(lc);
1342 181497 : if (cur < 0)
1343 : return EOF;
1344 181497 : if (cur == '|') {
1345 181472 : return scanner_token(lc, CONCATSTRING);
1346 25 : } else if (cur == '&') {
1347 0 : next = scanner_getc(lc);
1348 0 : if (next < 0)
1349 : return EOF;
1350 0 : if(next == '>') {
1351 0 : return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
1352 : } else {
1353 0 : utf8_putchar(lc, next); //put the char back
1354 0 : utf8_putchar(lc, cur); //put the char back
1355 0 : return scanner_token(lc, '|');
1356 : }
1357 25 : } else if (cur == '>') {
1358 1 : next = scanner_getc(lc);
1359 1 : if (next < 0)
1360 : return EOF;
1361 1 : if(next == '>') {
1362 1 : return scanner_token(lc, GEOM_ABOVE);
1363 : } else {
1364 0 : utf8_putchar(lc, next); //put the char back
1365 0 : utf8_putchar(lc, cur); //put the char back
1366 0 : return scanner_token(lc, '|');
1367 : }
1368 : } else {
1369 24 : utf8_putchar(lc, cur);
1370 24 : return scanner_token(lc, '|');
1371 : }
1372 : }
1373 10 : (void)sql_error( c, 3, SQLSTATE(42000) "Unexpected symbol (%lc)", (wint_t) cur);
1374 10 : return LEX_ERROR;
1375 : }
1376 :
1377 : static int
1378 28413752 : tokenize(mvc * c, int cur)
1379 : {
1380 28413752 : struct scanner *lc = &c->scanner;
1381 56791600 : while (1) {
1382 42602676 : if (cur == 0xFEFF) {
1383 : /* on Linux at least, iswpunct returns TRUE
1384 : * for U+FEFF, but we don't want that, we just
1385 : * want to go to the scanner_error case
1386 : * below */
1387 : ;
1388 42602758 : } else if (iswspace(cur)) {
1389 14207588 : if ((cur = skip_white_space(lc)) == EOF)
1390 : return cur;
1391 14188924 : continue; /* try again */
1392 28395170 : } else if (iswdigit(cur)) {
1393 1895252 : return number(c, cur);
1394 26499918 : } else if (iswalpha(cur) || cur == '_') {
1395 13573312 : switch (cur) {
1396 655522 : case 'e': /* string with escapes */
1397 : case 'E':
1398 655522 : if (scanner_read_more(lc, 1) != EOF &&
1399 655522 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1400 3747 : return scanner_string(c, scanner_getc(lc), true);
1401 : }
1402 : break;
1403 416836 : case 'x': /* blob */
1404 : case 'X':
1405 : case 'r': /* raw string */
1406 : case 'R':
1407 416836 : if (scanner_read_more(lc, 1) != EOF &&
1408 416836 : lc->rs->buf[lc->rs->pos + lc->yycur] == '\'') {
1409 3276 : return scanner_string(c, scanner_getc(lc), false);
1410 : }
1411 : break;
1412 156235 : case 'u': /* unicode string */
1413 : case 'U':
1414 156235 : if (scanner_read_more(lc, 1) != EOF &&
1415 156252 : lc->rs->buf[lc->rs->pos + lc->yycur] == '&' &&
1416 17 : scanner_read_more(lc, 2) != EOF &&
1417 17 : (lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '\'' ||
1418 : lc->rs->buf[lc->rs->pos + lc->yycur + 1] == '"')) {
1419 17 : cur = scanner_getc(lc); /* '&' */
1420 17 : return scanner_string(c, scanner_getc(lc), false);
1421 : }
1422 : break;
1423 : default:
1424 : break;
1425 : }
1426 13602380 : return keyword_or_ident(c, cur);
1427 12890498 : } else if (iswpunct(cur)) {
1428 12890384 : return scanner_symbol(c, cur);
1429 : }
1430 32 : if (cur == EOF) {
1431 0 : if (lc->mode == LINE_1 || !lc->started )
1432 : return cur;
1433 0 : return scanner_error(c, cur);
1434 : }
1435 : /* none of the above: error */
1436 32 : return scanner_error(c, cur);
1437 : }
1438 : }
1439 :
1440 : /* SQL 'quoted' idents consist of a set of any character of
1441 : * the source language character set other than a 'quote'
1442 : *
1443 : * MonetDB has 3 restrictions:
1444 : * 1 we disallow '%' as the first character.
1445 : * 2 the length is limited to 1024 characters
1446 : * 3 the identifier 'TID%' is not allowed
1447 : */
1448 : static bool
1449 1298102 : valid_ident(const char *restrict s, char *restrict dst)
1450 : {
1451 1298102 : int p = 0;
1452 :
1453 1298102 : if (*s == '%')
1454 : return false;
1455 :
1456 9603760 : while (*s) {
1457 8305658 : if ((dst[p++] = *s++) == '"' && *s == '"')
1458 68 : s++;
1459 8305658 : if (p >= 1024)
1460 : return false;
1461 : }
1462 1298102 : dst[p] = '\0';
1463 1298102 : if (strcmp(dst, TID + 1) == 0) /* an index named 'TID%' could interfere with '%TID%' */
1464 : return false;
1465 : return true;
1466 : }
1467 :
1468 : static inline int
1469 28515070 : sql_get_next_token(YYSTYPE *yylval, void *parm)
1470 : {
1471 28515070 : mvc *c = (mvc*)parm;
1472 28515070 : struct scanner *lc = &c->scanner;
1473 28515070 : int token = 0, cur = 0;
1474 :
1475 28515070 : if (lc->rs->buf == NULL) /* malloc failure */
1476 : return EOF;
1477 :
1478 28515070 : if (lc->yynext) {
1479 61661 : int next = lc->yynext;
1480 :
1481 61661 : lc->yynext = 0;
1482 61661 : return(next);
1483 : }
1484 :
1485 28453409 : if (lc->yybak) {
1486 27420390 : lc->rs->buf[lc->rs->pos + lc->yycur] = lc->yybak;
1487 27420390 : lc->yybak = 0;
1488 : }
1489 :
1490 28453409 : lc->yysval = lc->yycur;
1491 28453409 : lc->yylast = lc->yyval;
1492 28453409 : cur = scanner_getc(lc);
1493 28452781 : if (cur < 0)
1494 : return EOF;
1495 28341464 : token = tokenize(c, cur);
1496 :
1497 28341501 : yylval->sval = (lc->rs->buf + lc->rs->pos + lc->yysval);
1498 :
1499 28341501 : if (token == KW_TYPE)
1500 49488 : token = aTYPE;
1501 :
1502 28341501 : if (token == IDENT || token == COMPARISON ||
1503 22990910 : token == RANK || token == aTYPE || token == MARGFUNC) {
1504 5409957 : yylval->sval = sa_strndup(c->sa, yylval->sval, lc->yycur-lc->yysval);
1505 5409971 : lc->next_string_is_raw = false;
1506 22931544 : } else if (token == STRING) {
1507 2107832 : char quote = *yylval->sval;
1508 2107832 : char *str = sa_alloc( c->sa, (lc->yycur-lc->yysval-2)*2 + 1 );
1509 2107832 : char *dst;
1510 :
1511 2107832 : assert(quote == '"' || quote == '\'' || quote == 'E' || quote == 'e' || quote == 'U' || quote == 'u' || quote == 'X' || quote == 'x' || quote == 'R' || quote == 'r');
1512 :
1513 2107832 : lc->rs->buf[lc->rs->pos + lc->yycur - 1] = 0;
1514 2107832 : switch (quote) {
1515 1298102 : case '"':
1516 1298102 : if (valid_ident(yylval->sval+1,str)) {
1517 : token = IDENT;
1518 : } else {
1519 0 : sql_error(c, 1, SQLSTATE(42000) "Invalid identifier '%s'", yylval->sval+1);
1520 0 : return LEX_ERROR;
1521 : }
1522 : break;
1523 3746 : case 'e':
1524 : case 'E':
1525 3746 : assert(yylval->sval[1] == '\'');
1526 3746 : if (GDKstrFromStr((unsigned char *) str,
1527 : (unsigned char *) yylval->sval + 2,
1528 3746 : lc->yycur-lc->yysval - 2, '\'') < 0) {
1529 1 : char *err = GDKerrbuf;
1530 1 : if (strncmp(err, GDKERROR, strlen(GDKERROR)) == 0)
1531 1 : err += strlen(GDKERROR);
1532 0 : else if (*err == '!')
1533 0 : err++;
1534 1 : sql_error(c, 1, SQLSTATE(42000) "%s", err);
1535 1 : return LEX_ERROR;
1536 : }
1537 : quote = '\'';
1538 : break;
1539 17 : case 'u':
1540 : case 'U':
1541 17 : assert(yylval->sval[1] == '&');
1542 17 : assert(yylval->sval[2] == '\'' || yylval->sval[2] == '"');
1543 17 : strcpy(str, yylval->sval + 3);
1544 17 : token = yylval->sval[2] == '\'' ? USTRING : UIDENT;
1545 17 : quote = yylval->sval[2];
1546 17 : lc->next_string_is_raw = true;
1547 17 : break;
1548 1 : case 'x':
1549 : case 'X':
1550 1 : assert(yylval->sval[1] == '\'');
1551 1 : dst = str;
1552 5 : for (char *src = yylval->sval + 2; *src; dst++)
1553 4 : if ((*dst = *src++) == '\'' && *src == '\'')
1554 0 : src++;
1555 1 : *dst = 0;
1556 1 : quote = '\'';
1557 1 : token = XSTRING;
1558 1 : lc->next_string_is_raw = true;
1559 1 : break;
1560 3268 : case 'r':
1561 : case 'R':
1562 3268 : assert(yylval->sval[1] == '\'');
1563 3268 : dst = str;
1564 449799 : for (char *src = yylval->sval + 2; *src; dst++)
1565 446531 : if ((*dst = *src++) == '\'' && *src == '\'')
1566 2732 : src++;
1567 3268 : quote = '\'';
1568 3268 : *dst = 0;
1569 3268 : break;
1570 802698 : default:
1571 802698 : if (lc->raw_string_mode || lc->next_string_is_raw) {
1572 46 : dst = str;
1573 436 : for (char *src = yylval->sval + 1; *src; dst++)
1574 390 : if ((*dst = *src++) == '\'' && *src == '\'')
1575 1 : src++;
1576 46 : *dst = 0;
1577 : } else {
1578 802652 : if (GDKstrFromStr((unsigned char *)str,
1579 802652 : (unsigned char *)yylval->sval + 1,
1580 802652 : lc->yycur - lc->yysval - 1,
1581 : '\'') < 0) {
1582 1 : sql_error(c, 1, SQLSTATE(42000) "%s", GDKerrbuf);
1583 1 : return LEX_ERROR;
1584 : }
1585 : }
1586 : break;
1587 : }
1588 2107830 : yylval->sval = str;
1589 :
1590 : /* reset original */
1591 2107830 : lc->rs->buf[lc->rs->pos+lc->yycur- 1] = quote;
1592 : } else {
1593 20823712 : lc->next_string_is_raw = false;
1594 : }
1595 :
1596 : return(token);
1597 : }
1598 :
1599 : static int scanner( YYSTYPE *yylval, void *m, bool log);
1600 :
1601 : static int
1602 28383262 : scanner(YYSTYPE * yylval, void *parm, bool log)
1603 : {
1604 28383262 : int token;
1605 28383262 : mvc *c = (mvc *) parm;
1606 28383262 : struct scanner *lc = &c->scanner;
1607 28383262 : size_t pos;
1608 :
1609 : /* store position for when view's query ends */
1610 28383262 : pos = lc->rs->pos + lc->yycur;
1611 :
1612 28383262 : token = sql_get_next_token(yylval, parm);
1613 :
1614 28381695 : if (token == NOT) {
1615 74400 : int next = scanner(yylval, parm, false);
1616 :
1617 74400 : if (next == NOT) {
1618 2 : return scanner(yylval, parm, false);
1619 : } else if (next == EXISTS) {
1620 : token = NOT_EXISTS;
1621 : } else if (next == BETWEEN) {
1622 : token = NOT_BETWEEN;
1623 : } else if (next == sqlIN) {
1624 : token = NOT_IN;
1625 : } else if (next == LIKE) {
1626 : token = NOT_LIKE;
1627 : } else if (next == ILIKE) {
1628 : token = NOT_ILIKE;
1629 : } else {
1630 61661 : lc->yynext = next;
1631 : }
1632 28307295 : } else if (token == SCOLON) {
1633 : /* ignore semi-colon(s) following a semi-colon */
1634 995886 : if (lc->yylast == SCOLON) {
1635 132001 : size_t prev = lc->yycur;
1636 132002 : while ((token = sql_get_next_token(yylval, parm)) == SCOLON)
1637 1 : prev = lc->yycur;
1638 :
1639 : /* skip the skipped stuff also in the buffer */
1640 132001 : lc->rs->pos += prev;
1641 132001 : lc->yycur -= prev;
1642 : }
1643 : }
1644 :
1645 28381693 : if (lc->log && log)
1646 0 : mnstr_write(lc->log, lc->rs->buf+pos, lc->rs->pos + lc->yycur - pos, 1);
1647 :
1648 28381693 : lc->started += (token != EOF);
1649 28381693 : return token;
1650 : }
1651 :
1652 : /* also see sql_parser.y */
1653 : extern int sqllex(YYSTYPE * yylval, void *parm);
1654 :
1655 : int
1656 28309211 : sqllex(YYSTYPE * yylval, void *parm)
1657 : {
1658 28309211 : return scanner(yylval, parm, true);
1659 : }
|